Skip to content

Commit

Permalink
red-knot: Don't refer to Module instances as IDs (#11649)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexWaygood authored May 31, 2024
1 parent 16a926d commit d62a617
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 40 deletions.
89 changes: 50 additions & 39 deletions crates/red_knot/src/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ use crate::files::FileId;
use crate::symbols::Dependency;
use crate::FxDashMap;

/// ID uniquely identifying a module.
/// Representation of a Python module.
///
/// The inner type wrapped by this struct is a unique identifier for the module
/// that is used by the struct's methods to lazily query information about the module.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct Module(u32);

Expand Down Expand Up @@ -100,7 +103,8 @@ impl Module {

/// A module name, e.g. `foo.bar`.
///
/// Always normalized to the absolute form (never a relative module name).
/// Always normalized to the absolute form
/// (never a relative module name, i.e., never `.foo`).
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub struct ModuleName(smol_str::SmolStr);

Expand Down Expand Up @@ -250,9 +254,11 @@ pub struct ModuleData {
// Queries
//////////////////////////////////////////////////////

/// Resolves a module name to a module id
/// TODO: This would not work with Salsa because `ModuleName` isn't an ingredient and, therefore, cannot be used as part of a query.
/// For this to work with salsa, it would be necessary to intern all `ModuleName`s.
/// Resolves a module name to a module.
///
/// TODO: This would not work with Salsa because `ModuleName` isn't an ingredient
/// and, therefore, cannot be used as part of a query.
/// For this to work with salsa, it would be necessary to intern all `ModuleName`s.
#[tracing::instrument(level = "debug", skip(db))]
pub fn resolve_module(db: &dyn SemanticDb, name: ModuleName) -> QueryResult<Option<Module>> {
let jar: &SemanticJar = db.jar()?;
Expand All @@ -274,15 +280,15 @@ pub fn resolve_module(db: &dyn SemanticDb, name: ModuleName) -> QueryResult<Opti
let file_id = db.file_id(&normalized);
let path = ModulePath::new(root_path.clone(), file_id);

let id = Module(
let module = Module(
modules
.next_module_id
.fetch_add(1, std::sync::atomic::Ordering::Relaxed),
);

modules
.modules
.insert(id, Arc::from(ModuleData { name, path, kind }));
.insert(module, Arc::from(ModuleData { name, path, kind }));

// A path can map to multiple modules because of symlinks:
// ```
Expand All @@ -291,33 +297,33 @@ pub fn resolve_module(db: &dyn SemanticDb, name: ModuleName) -> QueryResult<Opti
// ```
// Here, both `foo` and `bar` resolve to the same module but through different paths.
// That's why we need to insert the absolute path and not the normalized path here.
let absolute_id = if absolute_path == normalized {
let absolute_file_id = if absolute_path == normalized {
file_id
} else {
db.file_id(&absolute_path)
};

modules.by_file.insert(absolute_id, id);
modules.by_file.insert(absolute_file_id, module);

entry.insert_entry(id);
entry.insert_entry(module);

Ok(Some(id))
Ok(Some(module))
}
}
}

/// Resolves the module id for the given path.
/// Resolves the module for the given path.
///
/// Returns `None` if the path is not a module in `sys.path`.
/// Returns `None` if the path is not a module locatable via `sys.path`.
#[tracing::instrument(level = "debug", skip(db))]
pub fn path_to_module(db: &dyn SemanticDb, path: &Path) -> QueryResult<Option<Module>> {
let file = db.file_id(path);
file_to_module(db, file)
}

/// Resolves the module id for the file with the given id.
/// Resolves the module for the file with the given id.
///
/// Returns `None` if the file is not a module in `sys.path`.
/// Returns `None` if the file is not a module locatable via `sys.path`.
#[tracing::instrument(level = "debug", skip(db))]
pub fn file_to_module(db: &dyn SemanticDb, file: FileId) -> QueryResult<Option<Module>> {
let jar: &SemanticJar = db.jar()?;
Expand All @@ -344,12 +350,12 @@ pub fn file_to_module(db: &dyn SemanticDb, file: FileId) -> QueryResult<Option<M

// Resolve the module name to see if Python would resolve the name to the same path.
// If it doesn't, then that means that multiple modules have the same in different
// root paths, but that the module corresponding to the past path is in a lower priority path,
// root paths, but that the module corresponding to the past path is in a lower priority search path,
// in which case we ignore it.
let Some(module_id) = resolve_module(db, module_name)? else {
let Some(module) = resolve_module(db, module_name)? else {
return Ok(None);
};
let module_path = module_id.path(db)?;
let module_path = module.path(db)?;

if module_path.root() == &root_path {
let Ok(normalized) = path.canonicalize() else {
Expand All @@ -369,7 +375,7 @@ pub fn file_to_module(db: &dyn SemanticDb, file: FileId) -> QueryResult<Option<M
}

// Path has been inserted by `resolved`
Ok(Some(module_id))
Ok(Some(module))
} else {
// This path is for a module with the same name but in a module search path with a lower priority.
// Ignore it.
Expand All @@ -388,19 +394,22 @@ pub fn set_module_search_paths(db: &mut dyn SemanticDb, search_paths: Vec<Module
jar.module_resolver = ModuleResolver::new(search_paths);
}

/// Adds a module to the resolver.
/// Adds a module located at `path` to the resolver.
///
/// Returns `None` if the path doesn't resolve to a module.
///
/// Returns `Some` with the id of the module and the ids of the modules that need re-resolving
/// because they were part of a namespace package and might now resolve differently.
/// Returns `Some(module, other_modules)`, where `module` is the resolved module
/// with file location `path`, and `other_modules` is a `Vec` of `ModuleData` instances.
/// Each element in `other_modules` provides information regarding a single module that needs
/// re-resolving because it was part of a namespace package and might now resolve differently.
///
/// Note: This won't work with salsa because `Path` is not an ingredient.
pub fn add_module(db: &mut dyn SemanticDb, path: &Path) -> Option<(Module, Vec<Arc<ModuleData>>)> {
// No locking is required because we're holding a mutable reference to `modules`.

// TODO This needs tests

// Note: Intentionally by-pass caching here. Module should not be in the cache yet.
// Note: Intentionally bypass caching here. Module should not be in the cache yet.
let module = path_to_module(db, path).ok()??;

// The code below is to handle the addition of `__init__.py` files.
Expand All @@ -424,15 +433,15 @@ pub fn add_module(db: &mut dyn SemanticDb, path: &Path) -> Option<(Module, Vec<A
let jar: &mut SemanticJar = db.jar_mut();
let modules = &mut jar.module_resolver;

modules.by_file.retain(|_, id| {
modules.by_file.retain(|_, module| {
if modules
.modules
.get(id)
.get(module)
.unwrap()
.name
.starts_with(&parent_name)
{
to_remove.push(*id);
to_remove.push(*module);
false
} else {
true
Expand All @@ -441,8 +450,8 @@ pub fn add_module(db: &mut dyn SemanticDb, path: &Path) -> Option<(Module, Vec<A

// TODO remove need for this vec
let mut removed = Vec::with_capacity(to_remove.len());
for id in &to_remove {
removed.push(modules.remove_module_by_id(*id));
for module in &to_remove {
removed.push(modules.remove_module(*module));
}

Some((module, removed))
Expand All @@ -455,10 +464,10 @@ pub struct ModuleResolver {

// Locking: Locking is done by acquiring a (write) lock on `by_name`. This is because `by_name` is the primary
// lookup method. Acquiring locks in any other ordering can result in deadlocks.
/// Resolves a module name to it's module id.
/// Looks up a module by name
by_name: FxDashMap<ModuleName, Module>,

/// All known modules, indexed by the module id.
/// A map of all known modules to data about those modules
modules: FxDashMap<Module, Arc<ModuleData>>,

/// Lookup from absolute path to module.
Expand All @@ -479,24 +488,26 @@ impl ModuleResolver {
}

/// Remove a module from the inner cache
pub(crate) fn remove_module(&mut self, file_id: FileId) {
pub(crate) fn remove_module_by_file(&mut self, file_id: FileId) {
// No locking is required because we're holding a mutable reference to `self`.
let Some((_, id)) = self.by_file.remove(&file_id) else {
let Some((_, module)) = self.by_file.remove(&file_id) else {
return;
};

self.remove_module_by_id(id);
self.remove_module(module);
}

fn remove_module_by_id(&mut self, id: Module) -> Arc<ModuleData> {
let (_, module) = self.modules.remove(&id).unwrap();
fn remove_module(&mut self, module: Module) -> Arc<ModuleData> {
let (_, module_data) = self.modules.remove(&module).unwrap();

self.by_name.remove(&module.name).unwrap();
self.by_name.remove(&module_data.name).unwrap();

// It's possible that multiple paths map to the same id. Search all other paths referencing the same module id.
self.by_file.retain(|_, current_id| *current_id != id);
// It's possible that multiple paths map to the same module.
// Search all other paths referencing the same module.
self.by_file
.retain(|_, current_module| *current_module != module);

module
module_data
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/red_knot/src/program/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl Program {

let (source, semantic, lint) = self.jars_mut();
for change in aggregated_changes.iter() {
semantic.module_resolver.remove_module(change.id);
semantic.module_resolver.remove_module_by_file(change.id);
semantic.symbol_tables.remove(&change.id);
source.sources.remove(&change.id);
source.parsed.remove(&change.id);
Expand Down

0 comments on commit d62a617

Please sign in to comment.