From b0905c4b043189879af78afb757270c4e0397e25 Mon Sep 17 00:00:00 2001 From: Dylan <53534755+dylwil3@users.noreply.github.com> Date: Thu, 9 Jan 2025 10:50:39 -0600 Subject: [PATCH] [`pycodestyle`] Handle each cell separately for `too-many-newlines-at-end-of-file` (`W391`) (#15308) Jupyter notebooks are converted into source files by joining with newlines, which confuses the check [too-many-newlines-at-end-of-file (W391)](https://docs.astral.sh/ruff/rules/too-many-newlines-at-end-of-file/#too-many-newlines-at-end-of-file-w391). This PR introduces logic to apply the check cell-wise (and, in particular, correctly handles empty cells.) Closes #13763 --- .../test/fixtures/pycodestyle/W391.ipynb | 92 ++++++++++++++ crates/ruff_linter/src/checkers/tokens.rs | 6 +- .../ruff_linter/src/rules/pycodestyle/mod.rs | 1 + .../rules/too_many_newlines_at_end_of_file.rs | 119 +++++++++++++----- ...tyle__tests__preview__W391_W391.ipynb.snap | 74 +++++++++++ 5 files changed, 261 insertions(+), 31 deletions(-) create mode 100644 crates/ruff_linter/resources/test/fixtures/pycodestyle/W391.ipynb create mode 100644 crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__preview__W391_W391.ipynb.snap diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/W391.ipynb b/crates/ruff_linter/resources/test/fixtures/pycodestyle/W391.ipynb new file mode 100644 index 0000000000000..a1d87cd893818 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pycodestyle/W391.ipynb @@ -0,0 +1,92 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# just a comment in this cell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# a comment and some newlines\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "1 + 1\n", + "# a comment\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "1+1\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index 1aa85c485663b..cb0eecfad1d5f 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -183,7 +183,11 @@ pub(crate) fn check_tokens( } if settings.rules.enabled(Rule::TooManyNewlinesAtEndOfFile) { - pycodestyle::rules::too_many_newlines_at_end_of_file(&mut diagnostics, tokens); + pycodestyle::rules::too_many_newlines_at_end_of_file( + &mut diagnostics, + tokens, + cell_offsets, + ); } diagnostics.retain(|diagnostic| settings.rules.enabled(diagnostic.kind.rule())); diff --git a/crates/ruff_linter/src/rules/pycodestyle/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/mod.rs index 2a63f0fd26da2..9c55fedb07864 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/mod.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/mod.rs @@ -79,6 +79,7 @@ mod tests { #[test_case(Rule::TooManyNewlinesAtEndOfFile, Path::new("W391_2.py"))] #[test_case(Rule::TooManyNewlinesAtEndOfFile, Path::new("W391_3.py"))] #[test_case(Rule::TooManyNewlinesAtEndOfFile, Path::new("W391_4.py"))] + #[test_case(Rule::TooManyNewlinesAtEndOfFile, Path::new("W391.ipynb"))] fn preview_rules(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!( "preview__{}_{}", diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs index ae2af5e9c328b..d05d340e087a8 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs @@ -1,11 +1,18 @@ +use std::iter::Peekable; + +use itertools::Itertools; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, ViolationMetadata}; -use ruff_python_parser::{TokenKind, Tokens}; +use ruff_notebook::CellOffsets; +use ruff_python_parser::{Token, TokenKind, Tokens}; use ruff_text_size::{Ranged, TextRange, TextSize}; /// ## What it does /// Checks for files with multiple trailing blank lines. /// +/// In the case of notebooks, this check is applied to +/// each cell separately. +/// /// ## Why is this bad? /// Trailing blank lines in a file are superfluous. /// @@ -23,17 +30,19 @@ use ruff_text_size::{Ranged, TextRange, TextSize}; #[derive(ViolationMetadata)] pub(crate) struct TooManyNewlinesAtEndOfFile { num_trailing_newlines: u32, + in_notebook: bool, } impl AlwaysFixableViolation for TooManyNewlinesAtEndOfFile { #[derive_message_formats] fn message(&self) -> String { + let domain = if self.in_notebook { "cell" } else { "file" }; // We expect a single trailing newline; so two trailing newlines is one too many, three // trailing newlines is two too many, etc. if self.num_trailing_newlines > 2 { - "Too many newlines at end of file".to_string() + format!("Too many newlines at end of {domain}") } else { - "Extra newline at end of file".to_string() + format!("Extra newline at end of {domain}") } } @@ -48,22 +57,68 @@ impl AlwaysFixableViolation for TooManyNewlinesAtEndOfFile { } /// W391 -pub(crate) fn too_many_newlines_at_end_of_file(diagnostics: &mut Vec, tokens: &Tokens) { - let mut num_trailing_newlines = 0u32; - let mut start: Option = None; - let mut end: Option = None; - - // Count the number of trailing newlines. - for token in tokens.iter().rev() { - match token.kind() { - TokenKind::NonLogicalNewline | TokenKind::Newline => { - if num_trailing_newlines == 0 { - end = Some(token.end()); +pub(crate) fn too_many_newlines_at_end_of_file( + diagnostics: &mut Vec, + tokens: &Tokens, + cell_offsets: Option<&CellOffsets>, +) { + let mut tokens_iter = tokens.iter().rev().peekable(); + + if let Some(cell_offsets) = cell_offsets { + diagnostics.extend(notebook_newline_diagnostics(tokens_iter, cell_offsets)); + } else if let Some(diagnostic) = newline_diagnostic(&mut tokens_iter, false) { + diagnostics.push(diagnostic); + }; +} + +/// Collects trailing newline diagnostics for each cell +fn notebook_newline_diagnostics<'a>( + mut tokens_iter: Peekable>, + cell_offsets: &CellOffsets, +) -> Vec { + let mut results = Vec::new(); + let offset_iter = cell_offsets.iter().rev(); + + // NB: When interpreting the below, recall that the iterators + // have been reversed. + for &offset in offset_iter { + // Advance to offset + tokens_iter + .peeking_take_while(|tok| tok.end() >= offset) + .for_each(drop); + + let Some(diagnostic) = newline_diagnostic(&mut tokens_iter, true) else { + continue; + }; + + results.push(diagnostic); + } + results +} + +/// Possible diagnostic, with fix, for too many newlines in cell or source file +fn newline_diagnostic<'a>( + tokens_iter: &mut Peekable>, + in_notebook: bool, +) -> Option { + let mut num_trailing_newlines: u32 = 0; + let mut newline_range_start: Option = None; + let mut newline_range_end: Option = None; + + while let Some(next_token) = tokens_iter.peek() { + match next_token.kind() { + TokenKind::Newline | TokenKind::NonLogicalNewline => { + if newline_range_end.is_none() { + newline_range_end = Some(next_token.end()); } - start = Some(token.end()); + newline_range_start = Some(next_token.end()); + + tokens_iter.next(); num_trailing_newlines += 1; } - TokenKind::Dedent => continue, + TokenKind::Dedent => { + tokens_iter.next(); + } _ => { break; } @@ -71,19 +126,23 @@ pub(crate) fn too_many_newlines_at_end_of_file(diagnostics: &mut Vec } if num_trailing_newlines == 0 || num_trailing_newlines == 1 { - return; - } - - let range = match (start, end) { - (Some(start), Some(end)) => TextRange::new(start, end), - _ => return, + return None; }; - let mut diagnostic = Diagnostic::new( - TooManyNewlinesAtEndOfFile { - num_trailing_newlines, - }, - range, - ); - diagnostic.set_fix(Fix::safe_edit(Edit::range_deletion(range))); - diagnostics.push(diagnostic); + + let (start, end) = (match (newline_range_start, newline_range_end) { + (Some(s), Some(e)) => Some((s, e)), + _ => None, + })?; + + let diagnostic_range = TextRange::new(start, end); + Some( + Diagnostic::new( + TooManyNewlinesAtEndOfFile { + num_trailing_newlines, + in_notebook, + }, + diagnostic_range, + ) + .with_fix(Fix::safe_edit(Edit::range_deletion(diagnostic_range))), + ) } diff --git a/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__preview__W391_W391.ipynb.snap b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__preview__W391_W391.ipynb.snap new file mode 100644 index 0000000000000..94ec2ac5c16ce --- /dev/null +++ b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__preview__W391_W391.ipynb.snap @@ -0,0 +1,74 @@ +--- +source: crates/ruff_linter/src/rules/pycodestyle/mod.rs +--- +W391.ipynb:5:1: W391 [*] Too many newlines at end of cell + | + 3 | # just a comment in this cell + 4 | # a comment and some newlines + 5 | / + 6 | | + 7 | | + 8 | | + | |_^ W391 + 9 | 1 + 1 +10 | # a comment + | + = help: Remove trailing newlines + +ℹ Safe fix +3 3 | # just a comment in this cell +4 4 | # a comment and some newlines +5 5 | +6 |- +7 |- +8 |- +9 6 | 1 + 1 +10 7 | # a comment +11 8 | + +W391.ipynb:11:1: W391 [*] Too many newlines at end of cell + | + 9 | 1 + 1 +10 | # a comment +11 | / +12 | | +13 | | +14 | | +15 | | + | |_^ W391 +16 | 1+1 + | + = help: Remove trailing newlines + +ℹ Safe fix +9 9 | 1 + 1 +10 10 | # a comment +11 11 | +12 |- +13 |- +14 |- +15 |- +16 12 | 1+1 +17 13 | +18 14 | + +W391.ipynb:17:1: W391 [*] Too many newlines at end of cell + | +16 | 1+1 +17 | / +18 | | +19 | | +20 | | +21 | | + | |_^ W391 + | + = help: Remove trailing newlines + +ℹ Safe fix +15 15 | +16 16 | 1+1 +17 17 | +18 |- +19 |- +20 |- +21 |-