From 4b3a413ebf06e2310e443e471f8fc076d61a8898 Mon Sep 17 00:00:00 2001 From: David Lukes Date: Fri, 16 Feb 2018 23:21:57 +0100 Subject: [PATCH] Attempt at checking for license (#209) I'm not quite sure how best to handle loading the license template from a path -- I mean obviously I know *how* to do it, but I'm not sure where to fit it in the codebase :) So this first attempt puts the license template directly into the config file. These are my misgivings about the license template config option as a path to a file (I'd love feedback if some of these are wrong or can be easily circumvented!): 1. I thought the obvious choice for the type of `license_template` in `create_config!` should be `PathBuf`, but `PathBuf` doesn't implement `FromStr` (yet? see https://github.com/rust-lang/rust/issues/44431), so it would have to be wrapped in a tuple struct, and I went down that road for a little while but then it seemed like too much ceremony for too little gain. 2. So a plain `String` then (which, mind you, also means the same `doc_hint()`, i.e. ``, not `` or something like that). The fact that it's a valid path will be checked once we try to read the file. 3. But where in the code should the license template be read? The obvious choice for me would be somewhere in `Config::from_toml()`, but since `Config` is defined via the `create_config!` macro, that would mean tight coupling between the macro invocation (which defines the configuration option `license_template`) and its definition (which would rely on the existence of that option to run the template loading code). 4. `license_template` could also be made a special option which is hardwired into the macro. This gets rid of the tight coupling, but special-casing one of the config options would make the code harder to navigate. 5. Instead, the macro could maybe be rewritten to allow for config options that load additional resources from files when the config is being parsed, but that's beyond my skill level I'm afraid (and probably overengineering the problem if it's only ever going to be used for this one option). 6. Finally, the file can be loaded at some later point in time, e.g. in `format_lines()`, right before `check_license()` is called. But to face a potential *IO* error at so late a stage, when the source files have already been parsed... I don't know, it doesn't feel right. BTW I don't like that I'm actually parsing the license template as late as inside `check_license()` either, but for much the same reasons, I don't know where else to put it. If the `Config` were hand-rolled instead of a macro, I'd just define a custom `license_template` option and load and parse the template in the `Config`'s init. But the way things are, I'm a bit at a loss. However, if someone more familiar with the project would kindly provide a few hints as to how the path approach can be done in a way that is as clean as possible in the context of the codebase, I'll be more than happy to implement it! :) --- rustfmt-config/src/lib.rs | 1 + rustfmt-core/src/lib.rs | 110 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 107 insertions(+), 4 deletions(-) diff --git a/rustfmt-config/src/lib.rs b/rustfmt-config/src/lib.rs index 3b6dca769d9..aa1c6431353 100644 --- a/rustfmt-config/src/lib.rs +++ b/rustfmt-config/src/lib.rs @@ -57,6 +57,7 @@ create_config! { comment_width: usize, 80, false, "Maximum length of comments. No effect unless wrap_comments = true"; normalize_comments: bool, false, true, "Convert /* */ comments to // comments where possible"; + license_template: String, String::default(), false, "Check for license"; // Single line expressions and items. empty_item_single_line: bool, true, false, diff --git a/rustfmt-core/src/lib.rs b/rustfmt-core/src/lib.rs index 3e82df9088c..cd566d3e091 100644 --- a/rustfmt-core/src/lib.rs +++ b/rustfmt-core/src/lib.rs @@ -39,6 +39,7 @@ use syntax::ast; use syntax::codemap::{CodeMap, FilePathMapping}; pub use syntax::codemap::FileName; use syntax::parse::{self, ParseSess}; +use regex::{Regex, RegexBuilder}; use checkstyle::{output_footer, output_header}; use comment::{CharClasses, FullCodeCharKind}; @@ -91,6 +92,10 @@ pub enum ErrorKind { TrailingWhitespace, // TO-DO or FIX-ME item without an issue number BadIssue(Issue), + // License check has failed + LicenseCheck, + // License template could not be parsed + ParsingLicense, } impl fmt::Display for ErrorKind { @@ -103,6 +108,8 @@ impl fmt::Display for ErrorKind { ), ErrorKind::TrailingWhitespace => write!(fmt, "left behind trailing whitespace"), ErrorKind::BadIssue(issue) => write!(fmt, "found {}", issue), + ErrorKind::LicenseCheck => write!(fmt, "license check failed"), + ErrorKind::ParsingLicense => write!(fmt, "parsing regex in license template failed"), } } } @@ -119,7 +126,11 @@ pub struct FormattingError { impl FormattingError { fn msg_prefix(&self) -> &str { match self.kind { - ErrorKind::LineOverflow(..) | ErrorKind::TrailingWhitespace => "error:", + ErrorKind::LineOverflow(..) + | ErrorKind::TrailingWhitespace + | ErrorKind::LicenseCheck => "error:", + | + ErrorKind::ParsingLicense => "error:", ErrorKind::BadIssue(_) => "WARNING:", } } @@ -397,8 +408,39 @@ fn should_report_error( } } +fn check_license(text: &str, license_template: &str) -> Result { + let mut template_re = String::from("^"); + // the template is parsed as a series of pairs of capture groups of (1) lazy whatever, which + // will be matched literally, followed by (2) a {}-delimited block, which will be matched as a + // regex + let template_parser = RegexBuilder::new(r"(.*?)\{(.*?)\}") + .dot_matches_new_line(true) + .build() + .unwrap(); + // keep track of the last matched offset and ultimately append the tail of the template (if any) + // after the last {} block + let mut last_matched_offset = 0; + for caps in template_parser.captures_iter(license_template) { + if let Some(mat) = caps.get(0) { + last_matched_offset = mat.end() + } + if let Some(mat) = caps.get(1) { + template_re.push_str(®ex::escape(mat.as_str())) + } + if let Some(mat) = caps.get(2) { + let mut re = mat.as_str(); + if re.is_empty() { + re = ".*?"; + } + template_re.push_str(re) + } + } + template_re.push_str(®ex::escape(&license_template[last_matched_offset..])); + let template_re = Regex::new(&template_re)?; + Ok(template_re.is_match(text)) +} + // Formatting done on a char by char or line by line basis. -// FIXME(#209) warn on bad license // FIXME(#20) other stuff for parity with make tidy fn format_lines( text: &mut String, @@ -407,7 +449,6 @@ fn format_lines( config: &Config, report: &mut FormatReport, ) { - // Iterate over the chars in the file map. let mut trims = vec![]; let mut last_wspace: Option = None; let mut line_len = 0; @@ -419,6 +460,33 @@ fn format_lines( let mut is_string = false; // true if the current line contains a string literal. let mut format_line = config.file_lines().contains_line(name, cur_line); + // Check license. + if config.was_set().license_template() { + match check_license(text, &config.license_template()) { + Ok(check) => { + if !check { + errors.push(FormattingError { + line: cur_line, + kind: ErrorKind::LicenseCheck, + is_comment: false, + is_string: false, + line_buffer: String::new(), + }); + } + } + Err(_) => { + errors.push(FormattingError { + line: cur_line, + kind: ErrorKind::ParsingLicense, + is_comment: false, + is_string: false, + line_buffer: String::new(), + }); + } + } + } + + // Iterate over the chars in the file map. for (kind, (b, c)) in CharClasses::new(text.chars().enumerate()) { if c == '\r' { continue; @@ -844,7 +912,7 @@ pub fn run(input: Input, config: &Config) -> Summary { #[cfg(test)] mod test { - use super::{format_code_block, format_snippet, Config}; + use super::{check_license, format_code_block, format_snippet, Config}; #[test] fn test_no_panic_on_format_snippet_and_format_code_block() { @@ -930,4 +998,38 @@ false, };"; assert!(test_format_inner(format_code_block, code_block, expected)); } + + #[test] + fn test_check_license() { + assert!(check_license("literal matching", "literal matching").unwrap()); + assert!(!check_license("literal no match", "literal matching").unwrap()); + assert!( + check_license( + "Regex start and end: 2018", + r"{[Rr]egex} start {} end: {\d+}" + ).unwrap() + ); + assert!(!check_license( + "Regex start and end no match: 2018", + r"{[Rr]egex} start {} end: {\d+}" + ).unwrap()); + assert!( + check_license( + "Regex in the middle: 2018 (tm)", + r"Regex {} middle: {\d+} (tm)" + ).unwrap() + ); + assert!(!check_license( + "Regex in the middle no match: 2018 (tm)", + r"Regex {} middle: {\d+} (tm)" + ).unwrap()); + assert!(!check_license("default doesn't match\nacross lines", "default {} lines").unwrap()); + assert!(check_license("", "this is not a valid {[regex}").is_err()); + assert!( + check_license( + "can't parse nested delimiters with regex", + r"can't parse nested delimiters with regex{\.{3}}" + ).is_err() + ); + } }