Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[red-knot] Combine terminal statement support with statically known branches #15817

Merged
merged 36 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
02c4798
Add ALWAYS_FALSE
dcreager Jan 29, 2025
2c0d577
Mark return states
dcreager Jan 29, 2025
e198806
Mark live bindings as non-visible when flow is unreachable
dcreager Jan 29, 2025
23b366d
Revert "Mark return states"
dcreager Jan 30, 2025
38f3d99
Handle final return statement specially
dcreager Jan 30, 2025
5c1918a
Customizable binding visibility
dcreager Jan 30, 2025
7504fb7
Reachability is now a vis constraint, not a bool
dcreager Jan 30, 2025
ee6f253
Fix ternary AND logic
dcreager Jan 30, 2025
6b53554
These are now unresolved I guess?
dcreager Jan 30, 2025
bb11cf8
clippy
dcreager Jan 30, 2025
a2ef702
Normalize negations of ALWAYS_{TRUE,FALSE}
dcreager Jan 30, 2025
1c42a2b
scope_start_visibility _is_ reachability
dcreager Jan 30, 2025
289c0c6
No, bindings are always visible
dcreager Jan 30, 2025
8b0899f
TODO for `raise`/`else` unreachability
dcreager Jan 30, 2025
9cd8e68
Fix test failure
dcreager Jan 30, 2025
1a80f81
Expected test case change
dcreager Jan 30, 2025
f71325c
Try to skip simplification by checking scope_start_visibility
dcreager Jan 30, 2025
0e06012
And try via a separate `always_reachable` boolean
dcreager Jan 30, 2025
b3b4577
Add AlwaysFalse as its own constraint variant
dcreager Jan 30, 2025
46b1ec2
Update always_reachable on merge correctly
dcreager Jan 30, 2025
999188a
Try checking if reachability contains AlwaysFalse in syntax tree
dcreager Jan 30, 2025
0f278da
But that doesn't work either
dcreager Jan 30, 2025
0fb6a74
Merge branch 'main' into dcreager/static-terminal
dcreager Feb 4, 2025
c882a95
This is working again
dcreager Feb 4, 2025
33db6f1
Remove static bool
dcreager Feb 4, 2025
b49916d
Remove moot comment
dcreager Feb 4, 2025
c80f27e
And another
dcreager Feb 4, 2025
0d50385
New bindings are visible only if control flow is reachable
dcreager Feb 4, 2025
c42490c
Add xfail for RET503
dcreager Feb 4, 2025
26f842b
Update terminal statement comment
dcreager Feb 5, 2025
e5b6c4a
Add shorter example for bindings after terminal statement
dcreager Feb 5, 2025
00e236f
Add back debug derive
dcreager Feb 5, 2025
1d93650
Add TODO to function symbol comment
dcreager Feb 5, 2025
ae83741
Wrap try examples in functions to bound reachability
dcreager Feb 5, 2025
f13a6a6
Spelling typo
dcreager Feb 5, 2025
7423de2
Add TODO for unreachable code example
dcreager Feb 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -452,12 +452,16 @@ def raise_in_both_branches(cond: bool):
# Exceptions can occur anywhere, so "before" and "raise" are valid possibilities
reveal_type(x) # revealed: Literal["before", "raise1", "raise2"]
else:
# This should not be included below, but we do not currently model that control flows that
# terminate via `raise` cannot enter the `else` clause.
carljm marked this conversation as resolved.
Show resolved Hide resolved
x = "unreachable"
finally:
# Exceptions can occur anywhere, so "before" and "raise" are valid possibilities
reveal_type(x) # revealed: Literal["before", "raise1", "raise2"]
# TODO: Literal["before", "raise1", "raise2"]
reveal_type(x) # revealed: Literal["before", "raise1", "raise2", "unreachable"]
# Exceptions can occur anywhere, so "before" and "raise" are valid possibilities
reveal_type(x) # revealed: Literal["before", "raise1", "raise2"]
# TODO: Literal["before", "raise1", "raise2"]
reveal_type(x) # revealed: Literal["before", "raise1", "raise2", "unreachable"]

def raise_in_nested_then_branch(cond1: bool, cond2: bool):
x = "before"
Expand Down Expand Up @@ -636,5 +640,5 @@ def _(cond: bool):
return

# TODO: Literal["a"]
reveal_type(x) # revealed: Literal["a", "b"]
reveal_type(x) # revealed: Literal["a"]
dcreager marked this conversation as resolved.
Show resolved Hide resolved
```
24 changes: 21 additions & 3 deletions crates/red_knot_python_semantic/src/semantic_index/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ impl<'db> SemanticIndexBuilder<'db> {

let file_scope_id = self.scopes.push(scope);
self.symbol_tables.push(SymbolTableBuilder::default());
self.use_def_maps.push(UseDefMapBuilder::default());
self.use_def_maps.push(UseDefMapBuilder::new(self.db));
let ast_id_scope = self.ast_ids.push(AstIdsBuilder::default());

let scope_id = ScopeId::new(self.db, self.file, file_scope_id, countme::Count::default());
Expand Down Expand Up @@ -356,7 +356,7 @@ impl<'db> SemanticIndexBuilder<'db> {
constraint: ScopedVisibilityConstraintId,
) -> ScopedVisibilityConstraintId {
self.current_use_def_map_mut()
.record_visibility_constraint(VisibilityConstraint::VisibleIfNot(constraint))
.record_negated_visibility_constraint_id(constraint)
}

/// Records a visibility constraint by applying it to all live bindings and declarations.
Expand Down Expand Up @@ -706,7 +706,25 @@ where

builder.declare_parameters(parameters);

builder.visit_body(body);
// HACK: Visit the function body, but treat the last statement specially if
// it is a return. If it is, this would cause all definitions in the
// function to be marked as non-visible with our current treatment of
// terminal statements. Since we currently model the externally visible
// definitions in a function scope as the set of bindings that are visible
// at the end of the body, we then consider this function to have no
// externally visible definitions. To get around this, we take a flow
// snapshot just before processing the return statement, and use _that_ as
// the "end-of-body" state that we resolve external references against.
dcreager marked this conversation as resolved.
Show resolved Hide resolved
if let Some((last_stmt, first_stmts)) = body.split_last() {
builder.visit_body(first_stmts);
let pre_return_state = matches!(last_stmt, ast::Stmt::Return(_))
.then(|| builder.flow_snapshot());
builder.visit_stmt(last_stmt);
if let Some(pre_return_state) = pre_return_state {
builder.flow_restore(pre_return_state);
}
}

builder.pop_scope()
},
);
Expand Down
54 changes: 33 additions & 21 deletions crates/red_knot_python_semantic/src/semantic_index/use_def.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ use crate::semantic_index::definition::Definition;
use crate::semantic_index::symbol::ScopedSymbolId;
use crate::semantic_index::use_def::symbol_state::DeclarationIdWithConstraint;
use crate::visibility_constraints::{VisibilityConstraint, VisibilityConstraints};
use crate::Db;
use ruff_index::IndexVec;
use rustc_hash::FxHashMap;

Expand Down Expand Up @@ -476,11 +477,11 @@ impl std::iter::FusedIterator for DeclarationsIterator<'_, '_> {}
pub(super) struct FlowSnapshot {
symbol_states: IndexVec<ScopedSymbolId, SymbolState>,
scope_start_visibility: ScopedVisibilityConstraintId,
reachable: bool,
}

#[derive(Debug)]
dcreager marked this conversation as resolved.
Show resolved Hide resolved
pub(super) struct UseDefMapBuilder<'db> {
db: &'db dyn Db,

/// Append-only array of [`Definition`].
all_definitions: IndexVec<ScopedDefinitionId, Option<Definition<'db>>>,

Expand All @@ -504,28 +505,24 @@ pub(super) struct UseDefMapBuilder<'db> {

/// Currently live bindings and declarations for each symbol.
symbol_states: IndexVec<ScopedSymbolId, SymbolState>,

reachable: bool,
carljm marked this conversation as resolved.
Show resolved Hide resolved
}

impl Default for UseDefMapBuilder<'_> {
fn default() -> Self {
impl<'db> UseDefMapBuilder<'db> {
pub(super) fn new(db: &'db dyn Db) -> Self {
Self {
db,
all_definitions: IndexVec::from_iter([None]),
all_constraints: IndexVec::new(),
visibility_constraints: VisibilityConstraints::default(),
scope_start_visibility: ScopedVisibilityConstraintId::ALWAYS_TRUE,
bindings_by_use: IndexVec::new(),
definitions_by_definition: FxHashMap::default(),
symbol_states: IndexVec::new(),
reachable: true,
}
}
}

impl<'db> UseDefMapBuilder<'db> {
pub(super) fn mark_unreachable(&mut self) {
self.reachable = false;
self.record_visibility_constraint_id(ScopedVisibilityConstraintId::ALWAYS_FALSE);
}

pub(super) fn add_symbol(&mut self, symbol: ScopedSymbolId) {
Expand Down Expand Up @@ -581,6 +578,15 @@ impl<'db> UseDefMapBuilder<'db> {
.add_and_constraint(self.scope_start_visibility, constraint);
}

pub(super) fn record_negated_visibility_constraint_id(
&mut self,
constraint: ScopedVisibilityConstraintId,
) -> ScopedVisibilityConstraintId {
let new_constraint_id = self.visibility_constraints.add_not_constraint(constraint);
self.record_visibility_constraint_id(new_constraint_id);
new_constraint_id
}

pub(super) fn record_visibility_constraint(
&mut self,
constraint: VisibilityConstraint<'db>,
Expand Down Expand Up @@ -664,7 +670,6 @@ impl<'db> UseDefMapBuilder<'db> {
FlowSnapshot {
symbol_states: self.symbol_states.clone(),
scope_start_visibility: self.scope_start_visibility,
reachable: self.reachable,
}
}

Expand All @@ -687,21 +692,31 @@ impl<'db> UseDefMapBuilder<'db> {
num_symbols,
SymbolState::undefined(self.scope_start_visibility),
);

self.reachable = snapshot.reachable;
}

/// Merge the given snapshot into the current state, reflecting that we might have taken either
/// path to get here. The new state for each symbol should include definitions from both the
/// prior state and the snapshot.
pub(super) fn merge(&mut self, snapshot: FlowSnapshot) {
// Unreachable snapshots should not be merged: If the current snapshot is unreachable, it
// should be completely overwritten by the snapshot we're merging in. If the other snapshot
// is unreachable, we should return without merging.
if !snapshot.reachable {
// As an optimization, if we know statically that either of the snapshots is always
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commenting out these two if clauses is how we verify that this is truly an optimization — we should get the same results for the tests with and without it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And I verified that we can indeed remove these checks and all tests pass! I'm not seeing a detectable performance improvement in the benchmark from including these lines; perhaps that just suggests conditional terminals aren't common enough for it to show up? It definitely seems like this should be faster in cases where it does apply.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not seeing a detectable performance improvement in the benchmark from including these lines

I think it might be that the merge step is now faster: if the other snapshot's reachability is ALWAYS_FALSE, then the visibility of all of its bindings should also be ALWAYS_FALSE. (I think there were cases before TDD normalization where we wouldn't be able to see that in the structure of the visibility constraint.) Merge will iterate through all of the bindings and AND their visibility constraints, but ANDing with ALWAYS_FALSE is one of the fast-path returns.

To be clear, it's a hunch — I haven't backed any of ☝️ with data!

// unreachable, we can leave it out of the merged result entirely. Note that we cannot
// perform any type inference at this point, so this is largely limited to unreachability
// via terminal statements. If a flow's reachability depends on an expression in the code,
// we will include the flow in the merged result; the visibility constraints of its
// bindings will include this reachability condition, so that later during type inference,
// we can determine whether any particular binding is non-visible due to unreachability.
if self
.visibility_constraints
.evaluate_without_inference(self.db, snapshot.scope_start_visibility)
.is_always_false()
{
return;
}
if !self.reachable {
if self
.visibility_constraints
.evaluate_without_inference(self.db, self.scope_start_visibility)
.is_always_false()
{
self.restore(snapshot);
return;
}
Expand All @@ -727,9 +742,6 @@ impl<'db> UseDefMapBuilder<'db> {
self.scope_start_visibility = self
.visibility_constraints
.add_or_constraint(self.scope_start_visibility, snapshot.scope_start_visibility);

// Both of the snapshots are reachable, so the merged result is too.
self.reachable = true;
}

pub(super) fn finish(mut self) -> UseDefMap<'db> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,12 @@ impl ScopedVisibilityConstraintId {
/// present at index 0.
pub(crate) const ALWAYS_TRUE: ScopedVisibilityConstraintId =
ScopedVisibilityConstraintId::from_u32(0);

/// A special ID that is used for an "always false" / "never visible" constraint.
/// When we create a new [`VisibilityConstraints`] object, this constraint is always
/// present at index 1.
pub(crate) const ALWAYS_FALSE: ScopedVisibilityConstraintId =
ScopedVisibilityConstraintId::from_u32(1);
}

const INLINE_VISIBILITY_CONSTRAINTS: usize = 4;
Expand Down
65 changes: 55 additions & 10 deletions crates/red_knot_python_semantic/src/visibility_constraints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,10 @@ pub(crate) struct VisibilityConstraints<'db> {
impl Default for VisibilityConstraints<'_> {
fn default() -> Self {
Self {
constraints: IndexVec::from_iter([VisibilityConstraint::AlwaysTrue]),
constraints: IndexVec::from_iter([
VisibilityConstraint::AlwaysTrue,
VisibilityConstraint::VisibleIfNot(ScopedVisibilityConstraintId::ALWAYS_TRUE),
]),
}
}
}
Expand All @@ -199,11 +202,33 @@ impl<'db> VisibilityConstraints<'db> {
self.constraints.push(constraint)
}

pub(crate) fn add_not_constraint(
&mut self,
a: ScopedVisibilityConstraintId,
) -> ScopedVisibilityConstraintId {
if a == ScopedVisibilityConstraintId::ALWAYS_TRUE {
ScopedVisibilityConstraintId::ALWAYS_FALSE
} else if a == ScopedVisibilityConstraintId::ALWAYS_FALSE {
ScopedVisibilityConstraintId::ALWAYS_TRUE
} else {
self.add(VisibilityConstraint::VisibleIfNot(a))
}
}

pub(crate) fn add_or_constraint(
&mut self,
a: ScopedVisibilityConstraintId,
b: ScopedVisibilityConstraintId,
) -> ScopedVisibilityConstraintId {
if a == ScopedVisibilityConstraintId::ALWAYS_FALSE {
return b;
} else if b == ScopedVisibilityConstraintId::ALWAYS_FALSE {
return a;
} else if a == ScopedVisibilityConstraintId::ALWAYS_TRUE
|| b == ScopedVisibilityConstraintId::ALWAYS_TRUE
{
return ScopedVisibilityConstraintId::ALWAYS_TRUE;
}
match (&self.constraints[a], &self.constraints[b]) {
(_, VisibilityConstraint::VisibleIfNot(id)) if a == *id => {
ScopedVisibilityConstraintId::ALWAYS_TRUE
Expand All @@ -224,17 +249,31 @@ impl<'db> VisibilityConstraints<'db> {
b
} else if b == ScopedVisibilityConstraintId::ALWAYS_TRUE {
a
} else if a == ScopedVisibilityConstraintId::ALWAYS_FALSE
|| b == ScopedVisibilityConstraintId::ALWAYS_FALSE
{
ScopedVisibilityConstraintId::ALWAYS_FALSE
} else {
self.add(VisibilityConstraint::KleeneAnd(a, b))
}
}

/// Analyze the statically known visibility for a given visibility constraint, without
/// performing any type inference.
pub(crate) fn evaluate_without_inference(
&self,
db: &'db dyn Db,
id: ScopedVisibilityConstraintId,
) -> Truthiness {
self.evaluate_impl::<false>(db, id, MAX_RECURSION_DEPTH)
}

/// Analyze the statically known visibility for a given visibility constraint.
pub(crate) fn evaluate(&self, db: &'db dyn Db, id: ScopedVisibilityConstraintId) -> Truthiness {
self.evaluate_impl(db, id, MAX_RECURSION_DEPTH)
self.evaluate_impl::<true>(db, id, MAX_RECURSION_DEPTH)
}

fn evaluate_impl(
fn evaluate_impl<const INFERENCE_ALLOWED: bool>(
&self,
db: &'db dyn Db,
id: ScopedVisibilityConstraintId,
Expand All @@ -248,18 +287,24 @@ impl<'db> VisibilityConstraints<'db> {
match visibility_constraint {
VisibilityConstraint::AlwaysTrue => Truthiness::AlwaysTrue,
VisibilityConstraint::Ambiguous => Truthiness::Ambiguous,
VisibilityConstraint::VisibleIf(constraint) => Self::analyze_single(db, constraint),
VisibilityConstraint::VisibleIfNot(negated) => {
self.evaluate_impl(db, *negated, max_depth - 1).negate()
VisibilityConstraint::VisibleIf(constraint) => {
if INFERENCE_ALLOWED {
Self::analyze_single(db, constraint)
} else {
Truthiness::Ambiguous
}
}
VisibilityConstraint::VisibleIfNot(negated) => self
.evaluate_impl::<INFERENCE_ALLOWED>(db, *negated, max_depth - 1)
.negate(),
VisibilityConstraint::KleeneAnd(lhs, rhs) => {
let lhs = self.evaluate_impl(db, *lhs, max_depth - 1);
let lhs = self.evaluate_impl::<INFERENCE_ALLOWED>(db, *lhs, max_depth - 1);

if lhs == Truthiness::AlwaysFalse {
return Truthiness::AlwaysFalse;
}

let rhs = self.evaluate_impl(db, *rhs, max_depth - 1);
let rhs = self.evaluate_impl::<INFERENCE_ALLOWED>(db, *rhs, max_depth - 1);

if rhs == Truthiness::AlwaysFalse {
Truthiness::AlwaysFalse
Expand All @@ -270,13 +315,13 @@ impl<'db> VisibilityConstraints<'db> {
}
}
VisibilityConstraint::KleeneOr(lhs_id, rhs_id) => {
let lhs = self.evaluate_impl(db, *lhs_id, max_depth - 1);
let lhs = self.evaluate_impl::<INFERENCE_ALLOWED>(db, *lhs_id, max_depth - 1);

if lhs == Truthiness::AlwaysTrue {
return Truthiness::AlwaysTrue;
}

let rhs = self.evaluate_impl(db, *rhs_id, max_depth - 1);
let rhs = self.evaluate_impl::<INFERENCE_ALLOWED>(db, *rhs_id, max_depth - 1);

if rhs == Truthiness::AlwaysTrue {
Truthiness::AlwaysTrue
Expand Down
Loading