Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[red-knot] Rework Type::to_instance() to return Option<Type> #16428

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions crates/red_knot_python_semantic/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ pub(crate) mod tests {
}
}

pub(crate) fn with_custom_typeshed(mut self, path: &'a str) -> Self {
self.custom_typeshed = Some(SystemPathBuf::from(path));
self
}

pub(crate) fn with_python_version(mut self, version: PythonVersion) -> Self {
self.python_version = version;
self
Expand Down
76 changes: 41 additions & 35 deletions crates/red_knot_python_semantic/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,7 @@ impl<'db> Type<'db> {
(Type::ClassLiteral(ClassLiteralType { class }), _) => class
.metaclass(db)
.to_instance(db)
.is_subtype_of(db, target),
.is_some_and(|instance_type| instance_type.is_subtype_of(db, target)),

// `type[str]` (== `SubclassOf("str")` in red-knot) describes all possible runtime subclasses
// of the class object `str`. It is a subtype of `type` (== `Instance("type")`) because `str`
Expand All @@ -692,11 +692,9 @@ impl<'db> Type<'db> {
(Type::SubclassOf(subclass_of_ty), _) => subclass_of_ty
.subclass_of()
.into_class()
.is_some_and(|class| {
class
.metaclass(db)
.to_instance(db)
.is_subtype_of(db, target)
.and_then(|class| class.metaclass(db).to_instance(db))
.is_some_and(|metaclass_instance_type| {
metaclass_instance_type.is_subtype_of(db, target)
}),

// For example: `Type::KnownInstance(KnownInstanceType::Type)` is a subtype of `Type::Instance(_SpecialForm)`,
Expand Down Expand Up @@ -1047,16 +1045,22 @@ impl<'db> Type<'db> {
ty.bool(db).is_always_true()
}

// for `type[Any]`/`type[Unknown]`/`type[Todo]`, we know the type cannot be any larger than `type`,
// so although the type is dynamic we can still determine disjointness in some situations
(Type::SubclassOf(subclass_of_ty), other)
| (other, Type::SubclassOf(subclass_of_ty)) => {
let metaclass_instance_ty = match subclass_of_ty.subclass_of() {
// for `type[Any]`/`type[Unknown]`/`type[Todo]`, we know the type cannot be any larger than `type`,
// so although the type is dynamic we can still determine disjointness in some situations
ClassBase::Dynamic(_) => KnownClass::Type.to_instance(db),
ClassBase::Class(class) => class.metaclass(db).to_instance(db),
};
other.is_disjoint_from(db, metaclass_instance_ty)
}
| (other, Type::SubclassOf(subclass_of_ty)) => match subclass_of_ty.subclass_of() {
ClassBase::Dynamic(_) => {
KnownClass::Type.to_instance(db).is_disjoint_from(db, other)
}
ClassBase::Class(class) => {
class
.metaclass(db)
.to_instance(db)
.is_some_and(|metaclass_instance_type| {
metaclass_instance_type.is_disjoint_from(db, other)
})
}
},

(Type::KnownInstance(known_instance), Type::Instance(InstanceType { class }))
| (Type::Instance(InstanceType { class }), Type::KnownInstance(known_instance)) => {
Expand Down Expand Up @@ -1127,7 +1131,9 @@ impl<'db> Type<'db> {
!class
.metaclass(db)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't it be an invariant that a class' metaclass is always something instantiable? It seems like it should be, since the class itself should be an instance of its metaclass.

Should we have a method that gets the instance type for a class' metaclass and handles this internally with an unreachable! instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interesting point -- yes, I think you're right!

.to_instance(db)
.is_subtype_of(db, instance)
.is_some_and(|metaclass_instance_type| {
metaclass_instance_type.is_subtype_of(db, instance)
})
}

(Type::FunctionLiteral(..), Type::Instance(InstanceType { class }))
Expand Down Expand Up @@ -1677,18 +1683,16 @@ impl<'db> Type<'db> {
Type::Callable(_) => Truthiness::AlwaysTrue,
Type::ModuleLiteral(_) => Truthiness::AlwaysTrue,
Type::ClassLiteral(ClassLiteralType { class }) => {
return class
.metaclass(db)
.to_instance(db)
.try_bool_impl(db, allow_short_circuit);
if let Some(metaclass_instance_type) = class.metaclass(db).to_instance(db) {
metaclass_instance_type.try_bool_impl(db, allow_short_circuit)?
} else {
Truthiness::Ambiguous
}
}
Type::SubclassOf(subclass_of_ty) => match subclass_of_ty.subclass_of() {
ClassBase::Dynamic(_) => Truthiness::Ambiguous,
ClassBase::Class(class) => {
return class
.metaclass(db)
.to_instance(db)
.try_bool_impl(db, allow_short_circuit);
Type::class_literal(class).try_bool_impl(db, allow_short_circuit)?
}
},
Type::AlwaysTruthy => Truthiness::AlwaysTrue,
Expand Down Expand Up @@ -2427,17 +2431,19 @@ impl<'db> Type<'db> {
}

#[must_use]
pub fn to_instance(&self, db: &'db dyn Db) -> Type<'db> {
pub fn to_instance(&self, db: &'db dyn Db) -> Option<Type<'db>> {
match self {
Type::Dynamic(_) => *self,
Type::Never => Type::Never,
Type::ClassLiteral(ClassLiteralType { class }) => Type::instance(*class),
Type::SubclassOf(subclass_of_ty) => match subclass_of_ty.subclass_of() {
ClassBase::Class(class) => Type::instance(class),
ClassBase::Dynamic(dynamic) => Type::Dynamic(dynamic),
},
Type::Union(union) => union.map(db, |element| element.to_instance(db)),
Type::Intersection(_) => todo_type!("Type::Intersection.to_instance()"),
Type::Dynamic(_) | Type::Never => Some(*self),
Type::ClassLiteral(ClassLiteralType { class }) => Some(Type::instance(*class)),
Type::SubclassOf(subclass_of_ty) => Some(subclass_of_ty.to_instance()),
Type::Union(union) => {
let mut builder = UnionBuilder::new(db);
for element in union.elements(db) {
builder = builder.add(element.to_instance(db)?);
}
Some(builder.build())
}
Type::Intersection(_) => Some(todo_type!("Type::Intersection.to_instance()")),
// TODO: calling `.to_instance()` on any of these should result in a diagnostic,
// since they already indicate that the object is an instance of some kind:
Type::BooleanLiteral(_)
Expand All @@ -2453,7 +2459,7 @@ impl<'db> Type<'db> {
| Type::Tuple(_)
| Type::LiteralString
| Type::AlwaysTruthy
| Type::AlwaysFalsy => Type::unknown(),
| Type::AlwaysFalsy => None,
}
}

Expand Down
158 changes: 144 additions & 14 deletions crates/red_knot_python_semantic/src/types/class.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::sync::{LazyLock, Mutex};

use crate::{
module_resolver::file_to_module,
semantic_index::{
Expand All @@ -18,6 +20,7 @@ use indexmap::IndexSet;
use itertools::Itertools as _;
use ruff_db::files::File;
use ruff_python_ast::{self as ast, PythonVersion};
use rustc_hash::FxHashSet;

use super::{
class_base::ClassBase, infer_expression_type, infer_unpack_types, IntersectionBuilder,
Expand Down Expand Up @@ -876,13 +879,61 @@ impl<'db> KnownClass {
}

pub(crate) fn to_instance(self, db: &'db dyn Db) -> Type<'db> {
self.to_class_literal(db).to_instance(db)
self.to_class_literal(db)
.into_class_literal()
.map(|ClassLiteralType { class }| Type::instance(class))
.unwrap_or_else(Type::unknown)
}

pub(crate) fn try_to_class_literal(
self,
db: &'db dyn Db,
) -> Result<ClassLiteralType<'db>, KnownClassLookupError<'db>> {
let symbol = known_module_symbol(db, self.canonical_module(db), self.as_str(db));
match symbol {
Symbol::Type(Type::ClassLiteral(class_type), Boundness::Bound) => Ok(class_type),
Symbol::Type(Type::ClassLiteral(class_type), Boundness::PossiblyUnbound) => {
Err(KnownClassLookupError::ClassPossiblyUnbound { class_type })
}
Symbol::Type(found_type, _) => {
Err(KnownClassLookupError::SymbolNotAClass { found_type })
}
Symbol::Unbound => Err(KnownClassLookupError::ClassNotFound),
}
}

pub(crate) fn to_class_literal(self, db: &'db dyn Db) -> Type<'db> {
known_module_symbol(db, self.canonical_module(db), self.as_str(db))
.ignore_possibly_unbound()
.unwrap_or(Type::unknown())
// a cache of the `KnownClass`es that we have already failed to lookup in typeshed
// (and therefore that we've already logged a warning for)
static MESSAGES: LazyLock<Mutex<FxHashSet<KnownClass>>> = LazyLock::new(Mutex::default);
Comment on lines +906 to +908
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this approach was inspired by the existing Ruff macro warn_user_once_by_message!() here:

pub static MESSAGES: LazyLock<Mutex<FxHashSet<String>>> = LazyLock::new(Mutex::default);
/// Warn a user once, if warnings are enabled, with uniqueness determined by the content of the
/// message.
#[macro_export]
macro_rules! warn_user_once_by_message {
($($arg:tt)*) => {
use colored::Colorize;
use log::warn;
if let Ok(mut states) = $crate::logging::MESSAGES.lock() {
let message = format!("{}", format_args!($($arg)*));
if !states.contains(&message) {
warn!("{}", message.bold());
states.insert(message);
}
}
};
}

I couldn't find any tests for that macro, and I wasn't sure how to test this (since it deliberately works differently depending on whether the test feature is enabled, in order to prevent bugs silently going unnoticed in tests!). If anybody has any ideas, I'd be interested!


self.try_to_class_literal(db)
.map(Type::ClassLiteral)
.unwrap_or_else(|lookup_error| {
if cfg!(test) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or I suppose this could be

Suggested change
if cfg!(test) {
if cfg!(debug_assertions) {

?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or instead of being an if/else, this could simply be debug_assert!(false, "{}", lookup_error.display(db, self));

panic!("{}", lookup_error.display(db, self));
} else if MESSAGES.lock().unwrap().insert(self) {
if matches!(
lookup_error,
KnownClassLookupError::ClassPossiblyUnbound { .. }
) {
tracing::warn!("{}", lookup_error.display(db, self));
} else {
tracing::warn!(
"{}. Falling back to `Unknown` for the symbol instead.",
lookup_error.display(db, self)
);
}
Comment on lines +916 to +926
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, I'm not totally convinced that we need to / should warn on this at all. It seems natural that if you don't have a class in your custom typeshed, then any special behavior we have around that class won't occur. If someone is doing this intentionally, it could be irritating to have a non-silencable warning about it (and using a "minimalist" typeshed doesn't seem like a crazy thing for someone to do intentionally). It could be accidental, but there are tons of ways a custom typeshed could quietly break things, it seems odd to warn about this specific one.

I don't feel strongly about it, though, what you have here seems fine and we can adjust with more experience.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmmm... yeah, I originally suggested just logging it (so that the information would be available to us when debugging, but wouldn't be printed by default), but @MichaReiser suggested in #16302 (comment) that a warning might be better... I think I'll switch it to being a LOG rather than a warning, so that you only get it if you specify --verbose on the CLI

}

match lookup_error {
KnownClassLookupError::ClassPossiblyUnbound { class_type, .. } => {
Type::class_literal(class_type.class)
}
KnownClassLookupError::ClassNotFound { .. }
| KnownClassLookupError::SymbolNotAClass { .. } => Type::unknown(),
}
})
}

pub(crate) fn to_subclass_of(self, db: &'db dyn Db) -> Type<'db> {
Expand All @@ -895,10 +946,8 @@ impl<'db> KnownClass {
/// Return `true` if this symbol can be resolved to a class definition `class` in typeshed,
/// *and* `class` is a subclass of `other`.
pub(super) fn is_subclass_of(self, db: &'db dyn Db, other: Class<'db>) -> bool {
known_module_symbol(db, self.canonical_module(db), self.as_str(db))
.ignore_possibly_unbound()
.and_then(Type::into_class_literal)
.is_some_and(|ClassLiteralType { class }| class.is_subclass_of(db, other))
self.try_to_class_literal(db)
.is_ok_and(|ClassLiteralType { class }| class.is_subclass_of(db, other))
}

/// Return the module in which we should look up the definition for this class
Expand Down Expand Up @@ -931,11 +980,10 @@ impl<'db> KnownClass {
| Self::MethodWrapperType
| Self::WrapperDescriptorType => KnownModule::Types,
Self::NoneType => KnownModule::Typeshed,
Self::SpecialForm
| Self::TypeVar
| Self::TypeAliasType
| Self::StdlibAlias
| Self::SupportsIndex => KnownModule::Typing,
Self::SpecialForm | Self::TypeVar | Self::StdlibAlias | Self::SupportsIndex => {
KnownModule::Typing
}
Self::TypeAliasType => KnownModule::TypingExtensions,
Comment on lines -934 to +986
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change here is that the canonical module for KnownClass::TypeAliasType is now KnownModule::TypingExtensions rather than KnownModule::Typing. Running the property tests on this branch instantly crashed without this change, as it was revealed that until now KnownClass::TypeAliasType.to_instance(db) has been silently resolving to Type::Unknown all this time, due to the fact that it was only added to the typing module in Python 3.12 but our default Python version is 3.9.

Self::NoDefaultType => {
let python_version = Program::get(db).python_version(db);

Expand Down Expand Up @@ -1164,6 +1212,57 @@ impl<'db> KnownClass {
}
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum KnownClassLookupError<'db> {
ClassNotFound,
SymbolNotAClass { found_type: Type<'db> },
ClassPossiblyUnbound { class_type: ClassLiteralType<'db> },
}

impl<'db> KnownClassLookupError<'db> {
fn display(&self, db: &'db dyn Db, class: KnownClass) -> impl std::fmt::Display + 'db {
struct ErrorDisplay<'db> {
db: &'db dyn Db,
class: KnownClass,
error: KnownClassLookupError<'db>,
}

impl std::fmt::Display for ErrorDisplay<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let ErrorDisplay { db, class, error } = *self;

let module = class.canonical_module(db).as_str();
let class = class.as_str(db);
let python_version = Program::get(db).python_version(db);

match error {
KnownClassLookupError::ClassNotFound => write!(
f,
"Could not find class `{module}.{class}` in typeshed on Python {python_version}",
),
KnownClassLookupError::SymbolNotAClass { found_type } => write!(
f,
"Error looking up `{module}.{class}` in typeshed: expected to find a class definition \
on Python {python_version}, but found a symbol of type `{found_type}` instead",
found_type = found_type.display(db),
),
KnownClassLookupError::ClassPossiblyUnbound { .. } => write!(
f,
"Error looking up `{module}.{class}` in typeshed on Python {python_version}: \
expected to find a fully bound symbol, but found one that is possibly unbound",
)
}
}
}

ErrorDisplay {
db,
class,
error: *self,
}
}
}

/// Enumeration of specific runtime that are special enough to be considered their own type.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, salsa::Update)]
pub enum KnownInstanceType<'db> {
Expand Down Expand Up @@ -1539,7 +1638,7 @@ pub(super) enum MetaclassErrorKind<'db> {
#[cfg(test)]
mod tests {
use super::*;
use crate::db::tests::setup_db;
use crate::db::tests::{setup_db, TestDb, TestDbBuilder};
use crate::module_resolver::resolve_module;
use strum::IntoEnumIterator;

Expand All @@ -1557,4 +1656,35 @@ mod tests {
);
}
}

fn setup_db_with_broken_typeshed(builtins_file: &str) -> TestDb {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this is a wording nit, sort of related to my "not sure we need to warn" comment above -- but I slightly object to the framing of this as a "broken" typeshed. I think we should aim to consider typesheds where certain symbols aren't present or aren't what we expect to still be "working", where some behaviors will just fall back to gradual typing. Maybe it doesn't really matter, but I'd prefer we don't accept the idea that it's OK for things to be "broken" because a minimal typeshed is used.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. FWIW, we do panic (Salsa cycles etc.) if there's no object class in builtins; we did before this PR, and will continue to with this PR. I'm not sure I have the appetite to fix that "bug" :P

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's salsa cycles, I expect the panics will go away with fixpoint iteration? Will check! Not having object basically means everything is Unknown (because everything inherits Unknown)...

TestDbBuilder::new()
.with_custom_typeshed("/typeshed")
.with_file("/typeshed/stdlib/builtins.pyi", builtins_file)
.with_file("/typeshed/stdlib/types.pyi", "class ModuleType: ...")
.with_file("/typeshed/stdlib/VERSIONS", "builtins: 3.8-\ntypes: 3.8-")
.build()
.unwrap()
}

#[test]
#[should_panic(expected = "Could not find class `builtins.int` in typeshed")]
fn known_class_to_class_literal_panics_with_test_feature_enabled() {
let db = setup_db_with_broken_typeshed("class object: ...");
KnownClass::Int.to_class_literal(&db);
}

#[test]
#[should_panic(expected = "Could not find class `builtins.int` in typeshed")]
fn known_class_to_instance_panics_with_test_feature_enabled() {
let db = setup_db_with_broken_typeshed("class object: ...");
KnownClass::Int.to_instance(&db);
}

#[test]
#[should_panic(expected = "found a symbol of type `Unknown | Literal[42]` instead")]
fn known_class_to_subclass_of_panics_with_test_feature_enabled() {
let db = setup_db_with_broken_typeshed("int = 42");
KnownClass::Int.to_subclass_of(&db);
}
}
12 changes: 9 additions & 3 deletions crates/red_knot_python_semantic/src/types/infer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1751,7 +1751,10 @@ impl<'db> TypeInferenceBuilder<'db> {
for element in tuple.elements(self.db()).iter().copied() {
builder = builder.add(
if element.is_assignable_to(self.db(), type_base_exception) {
element.to_instance(self.db())
element.to_instance(self.db()).expect(
"`Type::to_instance()` should always return `Some()` \
if called on a type assignable to `type[BaseException]`",
)
} else {
if let Some(node) = node {
report_invalid_exception_caught(&self.context, node, element);
Expand All @@ -1766,7 +1769,10 @@ impl<'db> TypeInferenceBuilder<'db> {
} else {
let type_base_exception = KnownClass::BaseException.to_subclass_of(self.db());
if node_ty.is_assignable_to(self.db(), type_base_exception) {
node_ty.to_instance(self.db())
node_ty.to_instance(self.db()).expect(
"`Type::to_instance()` should always return `Some()` \
if called on a type assignable to `type[BaseException]`",
)
} else {
if let Some(node) = node {
report_invalid_exception_caught(&self.context, node, node_ty);
Expand Down Expand Up @@ -2535,7 +2541,7 @@ impl<'db> TypeInferenceBuilder<'db> {
} = raise;

let base_exception_type = KnownClass::BaseException.to_subclass_of(self.db());
let base_exception_instance = base_exception_type.to_instance(self.db());
let base_exception_instance = KnownClass::BaseException.to_instance(self.db());

let can_be_raised =
UnionType::from_elements(self.db(), [base_exception_type, base_exception_instance]);
Expand Down
Loading
Loading