From d79add341ba4be10bb3459877318b9c5a30f5db3 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 11 Oct 2016 19:57:09 -0400 Subject: [PATCH] Move all gitignore matching to separate crate. This PR introduces a new sub-crate, `ignore`, which primarily provides a fast recursive directory iterator that respects ignore files like gitignore and other configurable filtering rules based on globs or even file types. This results in a substantial source of complexity moved out of ripgrep's core and into a reusable component that others can now (hopefully) benefit from. While much of the ignore code carried over from ripgrep's core, a substantial portion of it was rewritten with the following goals in mind: 1. Reuse matchers built from gitignore files across directory iteration. 2. Design the matcher data structure to be amenable for parallelizing directory iteration. (Indeed, writing the parallel iterator is the next step.) Fixes #9, #44, #45 --- Cargo.lock | 57 +-- Cargo.toml | 6 +- appveyor.yml | 8 +- ci/script.sh | 2 + globset/Cargo.toml | 3 + globset/benches/bench.rs | 3 + globset/src/lib.rs | 51 ++- globset/src/pathutil.rs | 6 +- grep/Cargo.toml | 2 +- ignore/Cargo.lock | 170 +++++++++ ignore/Cargo.toml | 36 ++ ignore/README.md | 66 ++++ ignore/examples/walk.rs | 28 ++ ignore/src/dir.rs | 803 +++++++++++++++++++++++++++++++++++++++ ignore/src/gitignore.rs | 607 +++++++++++++++++++++++++++++ ignore/src/lib.rs | 300 +++++++++++++++ ignore/src/overrides.rs | 202 ++++++++++ ignore/src/pathutil.rs | 108 ++++++ ignore/src/types.rs | 568 +++++++++++++++++++++++++++ ignore/src/walk.rs | 592 +++++++++++++++++++++++++++++ src/args.rs | 105 +++-- src/gitignore.rs | 455 ---------------------- src/ignore.rs | 493 ------------------------ src/main.rs | 90 ++--- src/pathutil.rs | 78 +--- src/printer.rs | 6 +- src/terminal.rs | 0 src/types.rs | 458 ---------------------- src/walk.rs | 140 ------- tests/tests.rs | 82 ++++ 30 files changed, 3765 insertions(+), 1760 deletions(-) create mode 100644 ignore/Cargo.lock create mode 100644 ignore/Cargo.toml create mode 100644 ignore/README.md create mode 100644 ignore/examples/walk.rs create mode 100644 ignore/src/dir.rs create mode 100644 ignore/src/gitignore.rs create mode 100644 ignore/src/lib.rs create mode 100644 ignore/src/overrides.rs create mode 100644 ignore/src/pathutil.rs create mode 100644 ignore/src/types.rs create mode 100644 ignore/src/walk.rs delete mode 100644 src/gitignore.rs delete mode 100644 src/ignore.rs delete mode 100644 src/terminal.rs delete mode 100644 src/types.rs delete mode 100644 src/walk.rs diff --git a/Cargo.lock b/Cargo.lock index ba88e2cb4..b10e0602e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,20 +5,18 @@ dependencies = [ "deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", - "globset 0.1.0", "grep 0.1.3", + "ignore 0.1.0", "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", - "memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)", "term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", - "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -44,7 +42,7 @@ version = "0.6.86" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)", "strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -55,7 +53,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -65,7 +63,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "fs2" -version = "0.2.5" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -82,7 +80,7 @@ dependencies = [ "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -91,9 +89,22 @@ version = "0.1.3" dependencies = [ "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", - "memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", + "memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ignore" +version = "0.1.0" +dependencies = [ + "globset 0.1.0", + "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -130,10 +141,10 @@ dependencies = [ [[package]] name = "memmap" -version = "0.2.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", + "fs2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", @@ -157,12 +168,12 @@ dependencies = [ [[package]] name = "regex" -version = "0.1.77" +version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", "simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", @@ -170,7 +181,7 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.3.7" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -221,7 +232,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "walkdir" -version = "0.1.8" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -244,17 +255,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)" = "4a7ef30445607f6fc8720f0a0a2c7442284b629cf0d049286860fae23e71c4d9" "checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f" "checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344" -"checksum fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "bcd414e5a1a979b931bb92f41b7a54106d3f6d2e6c253e9ce943b7cd468251ef" +"checksum fs2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "640001e1bd865c7c32806292822445af576a6866175b5225aa2087ca5e3de551" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" "checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f" "checksum libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "044d1360593a78f5c8e5e710beccdc24ab71d1f01bc19a29bcacdba22e8475d8" "checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054" "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" -"checksum memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f20f72ed93291a72e22e8b16bb18762183bb4943f0f483da5b8be1a9e8192752" +"checksum memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "065ce59af31c18ea2c419100bda6247dd4ec3099423202b12f0bd32e529fabd2" "checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad" "checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5" -"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665" -"checksum regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "48f0573bcee95a48da786f8823465b5f2a1fae288a55407aca991e5b3e0eae11" +"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f" +"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957" "checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b" "checksum simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "63b5847c2d766ca7ce7227672850955802fabd779ba616aeabead4c2c3877023" "checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e" @@ -262,6 +273,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" "checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" "checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" -"checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780" +"checksum walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "98da26f00240118fbb7a06fa29579d1b39d34cd6e0505ea5c125b26d5260a967" "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" diff --git a/Cargo.toml b/Cargo.toml index e0480c54f..60db7c4ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,19 +27,17 @@ path = "tests/tests.rs" deque = "0.3" docopt = "0.6" env_logger = "0.3" -globset = { version = "0.1.0", path = "globset" } grep = { version = "0.1.3", path = "grep" } +ignore = { version = "0.1.0", path = "ignore" } lazy_static = "0.2" libc = "0.2" log = "0.3" memchr = "0.1" -memmap = "0.2" +memmap = "0.5" num_cpus = "1" regex = "0.1.77" rustc-serialize = "0.3" term = "0.4" -thread_local = "0.2.7" -walkdir = "0.1" [target.'cfg(windows)'.dependencies] kernel32-sys = "0.2" diff --git a/appveyor.yml b/appveyor.yml index 266812db1..645a525d1 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -30,6 +30,7 @@ test_script: - cargo test --verbose - cargo test --verbose --manifest-path grep/Cargo.toml - cargo test --verbose --manifest-path globset/Cargo.toml + - cargo test --verbose --manifest-path ignore/Cargo.toml before_deploy: # Generate artifacts for release @@ -59,7 +60,8 @@ deploy: branches: only: - - appveyor - - /\d+\.\d+\.\d+/ - except: - master + # - appveyor + # - /\d+\.\d+\.\d+/ + # except: + # - master diff --git a/ci/script.sh b/ci/script.sh index eca6c0f60..bf0731a2b 100644 --- a/ci/script.sh +++ b/ci/script.sh @@ -23,6 +23,8 @@ run_test_suite() { cargo test --target $TARGET --verbose --manifest-path grep/Cargo.toml cargo build --target $TARGET --verbose --manifest-path globset/Cargo.toml cargo test --target $TARGET --verbose --manifest-path globset/Cargo.toml + cargo build --target $TARGET --verbose --manifest-path ignore/Cargo.toml + cargo test --target $TARGET --verbose --manifest-path ignore/Cargo.toml # sanity check the file type file target/$TARGET/debug/rg diff --git a/globset/Cargo.toml b/globset/Cargo.toml index a885ea183..b302d9cdf 100644 --- a/globset/Cargo.toml +++ b/globset/Cargo.toml @@ -28,3 +28,6 @@ regex = "0.1.77" [dev-dependencies] glob = "0.2" + +[features] +simd-accel = ["regex/simd-accel"] diff --git a/globset/benches/bench.rs b/globset/benches/bench.rs index a151645d1..e142ed72e 100644 --- a/globset/benches/bench.rs +++ b/globset/benches/bench.rs @@ -11,6 +11,9 @@ extern crate lazy_static; extern crate regex; extern crate test; +use std::ffi::OsStr; +use std::path::Path; + use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder}; const EXT: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt"; diff --git a/globset/src/lib.rs b/globset/src/lib.rs index 056118a39..b9a36d3af 100644 --- a/globset/src/lib.rs +++ b/globset/src/lib.rs @@ -226,10 +226,21 @@ type Fnv = hash::BuildHasherDefault; /// single pass. #[derive(Clone, Debug)] pub struct GlobSet { + len: usize, strats: Vec, } impl GlobSet { + /// Returns true if this set is empty, and therefore matches nothing. + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Returns the number of globs in this set. + pub fn len(&self) -> usize { + self.len + } + /// Returns true if any glob in this set matches the path given. pub fn is_match>(&self, path: P) -> bool { self.is_match_candidate(&Candidate::new(path.as_ref())) @@ -240,6 +251,9 @@ impl GlobSet { /// This takes a Candidate as input, which can be used to amortize the /// cost of preparing a path for matching. pub fn is_match_candidate(&self, path: &Candidate) -> bool { + if self.is_empty() { + return false; + } for strat in &self.strats { if strat.is_match(path) { return true; @@ -250,9 +264,6 @@ impl GlobSet { /// Returns the sequence number of every glob pattern that matches the /// given path. - /// - /// This takes a Candidate as input, which can be used to amortize the - /// cost of preparing a path for matching. pub fn matches>(&self, path: P) -> Vec { self.matches_candidate(&Candidate::new(path.as_ref())) } @@ -264,6 +275,9 @@ impl GlobSet { /// cost of preparing a path for matching. pub fn matches_candidate(&self, path: &Candidate) -> Vec { let mut into = vec![]; + if self.is_empty() { + return into; + } self.matches_candidate_into(path, &mut into); into } @@ -274,12 +288,32 @@ impl GlobSet { /// `into` is is cleared before matching begins, and contains the set of /// sequence numbers (in ascending order) after matching ends. If no globs /// were matched, then `into` will be empty. + pub fn matches_into>( + &self, + path: P, + into: &mut Vec, + ) { + self.matches_candidate_into(&Candidate::new(path.as_ref()), into); + } + + /// Adds the sequence number of every glob pattern that matches the given + /// path to the vec given. + /// + /// `into` is is cleared before matching begins, and contains the set of + /// sequence numbers (in ascending order) after matching ends. If no globs + /// were matched, then `into` will be empty. + /// + /// This takes a Candidate as input, which can be used to amortize the + /// cost of preparing a path for matching. pub fn matches_candidate_into( &self, path: &Candidate, into: &mut Vec, ) { into.clear(); + if self.is_empty() { + return; + } for strat in &self.strats { strat.matches_into(path, into); } @@ -288,6 +322,9 @@ impl GlobSet { } fn new(pats: &[Glob]) -> Result { + if pats.is_empty() { + return Ok(GlobSet { len: 0, strats: vec![] }); + } let mut lits = LiteralStrategy::new(); let mut base_lits = BasenameLiteralStrategy::new(); let mut exts = ExtensionStrategy::new(); @@ -330,6 +367,7 @@ impl GlobSet { prefixes.literals.len(), suffixes.literals.len(), required_exts.0.len(), regexes.literals.len()); Ok(GlobSet { + len: pats.len(), strats: vec![ GlobSetMatchStrategy::Extension(exts), GlobSetMatchStrategy::BasenameLiteral(base_lits), @@ -750,4 +788,11 @@ mod tests { assert_eq!(0, matches[0]); assert_eq!(2, matches[1]); } + + #[test] + fn empty_set_works() { + let set = GlobSetBuilder::new().build().unwrap(); + assert!(!set.is_match("")); + assert!(!set.is_match("a")); + } } diff --git a/globset/src/pathutil.rs b/globset/src/pathutil.rs index 15a3283bd..16bd16fc7 100644 --- a/globset/src/pathutil.rs +++ b/globset/src/pathutil.rs @@ -89,16 +89,14 @@ pub fn path_bytes(path: &Path) -> Cow<[u8]> { os_str_bytes(path.as_os_str()) } -/// Return the raw bytes of the given OS string, transcoded to UTF-8 if -/// necessary. +/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8. #[cfg(unix)] pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> { use std::os::unix::ffi::OsStrExt; Cow::Borrowed(s.as_bytes()) } -/// Return the raw bytes of the given OS string, transcoded to UTF-8 if -/// necessary. +/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8. #[cfg(not(unix))] pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> { // TODO(burntsushi): On Windows, OS strings are WTF-8, which is a superset diff --git a/grep/Cargo.toml b/grep/Cargo.toml index d14ba8868..8637f16be 100644 --- a/grep/Cargo.toml +++ b/grep/Cargo.toml @@ -15,6 +15,6 @@ license = "Unlicense/MIT" [dependencies] log = "0.3" memchr = "0.1" -memmap = "0.2" +memmap = "0.5" regex = "0.1.77" regex-syntax = "0.3.5" diff --git a/ignore/Cargo.lock b/ignore/Cargo.lock new file mode 100644 index 000000000..7046ecddf --- /dev/null +++ b/ignore/Cargo.lock @@ -0,0 +1,170 @@ +[root] +name = "ignore" +version = "0.1.0" +dependencies = [ + "globset 0.1.0", + "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", + "tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "aho-corasick" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "fnv" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "globset" +version = "0.1.0" +dependencies = [ + "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "lazy_static" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "libc" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "log" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "memchr" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "0.1.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", + "simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "simd" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "tempdir" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread-id" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread_local" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "utf8-ranges" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "walkdir" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" +"checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344" +"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +"checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f" +"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d" +"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054" +"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" +"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5" +"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665" +"checksum regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "48f0573bcee95a48da786f8823465b5f2a1fae288a55407aca991e5b3e0eae11" +"checksum simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "63b5847c2d766ca7ce7227672850955802fabd779ba616aeabead4c2c3877023" +"checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6" +"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" +"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" +"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" +"checksum walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "98da26f00240118fbb7a06fa29579d1b39d34cd6e0505ea5c125b26d5260a967" +"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" +"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" diff --git a/ignore/Cargo.toml b/ignore/Cargo.toml new file mode 100644 index 000000000..520f9cf4f --- /dev/null +++ b/ignore/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "ignore" +version = "0.1.0" #:version +authors = ["Andrew Gallant "] +description = """ +A fast library for efficiently matching ignore files such as `.gitignore` +against file paths. +""" +documentation = "https://docs.rs/ignore" +homepage = "https://github.com/BurntSushi/ripgrep/tree/master/ignore" +repository = "https://github.com/BurntSushi/ripgrep/tree/master/ignore" +readme = "README.md" +keywords = ["glob", "ignore", "gitignore", "pattern", "file"] +license = "Unlicense/MIT" + +[lib] +name = "ignore" +bench = false + +[dependencies] +globset = { version = "0.1.0", path = "../globset" } +lazy_static = "0.2" +log = "0.3" +memchr = "0.1" +regex = "0.1.77" +thread_local = "0.2.7" +walkdir = "1" + +[dev-dependencies] +tempdir = "0.3.5" + +[features] +simd-accel = ["globset/simd-accel"] + +[profile.release] +debug = true diff --git a/ignore/README.md b/ignore/README.md new file mode 100644 index 000000000..2d2907c84 --- /dev/null +++ b/ignore/README.md @@ -0,0 +1,66 @@ +ignore +====== +The ignore crate provides a fast recursive directory iterator that respects +various filters such as globs, file types and `.gitignore` files. This crate +also provides lower level direct access to gitignore and file type matchers. + +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep) +[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) +[![](https://img.shields.io/crates/v/ignore.svg)](https://crates.io/crates/ignore) + +Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + +### Documentation + +[https://docs.rs/ignore](https://docs.rs/ignore) + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +ignore = "0.1" +``` + +and this to your crate root: + +```rust +extern crate ignore; +``` + +### Example + +This example shows the most basic usage of this crate. This code will +recursively traverse the current directory while automatically filtering out +files and directories according to ignore globs found in files like +`.ignore` and `.gitignore`: + + +```rust,no_run +use ignore::Walk; + +for result in Walk::new("./") { + // Each item yielded by the iterator is either a directory entry or an + // error, so either print the path or the error. + match result { + Ok(entry) => println!("{}", entry.path().display()), + Err(err) => println!("ERROR: {}", err), + } +} +``` + +### Example: advanced + +By default, the recursive directory iterator will ignore hidden files and +directories. This can be disabled by building the iterator with `WalkBuilder`: + +```rust,no_run +use ignore::WalkBuilder; + +for result in WalkBuilder::new("./").hidden(false).build() { + println!("{:?}", result); +} +``` + +See the documentation for `WalkBuilder` for many other options. diff --git a/ignore/examples/walk.rs b/ignore/examples/walk.rs new file mode 100644 index 000000000..0ce0a086e --- /dev/null +++ b/ignore/examples/walk.rs @@ -0,0 +1,28 @@ +/* +extern crate ignore; +extern crate walkdir; + +use std::env; +use std::io::{self, Write}; +use std::os::unix::ffi::OsStrExt; + +use ignore::ignore::IgnoreBuilder; +use walkdir::WalkDir; + +fn main() { + let path = env::args().nth(1).unwrap(); + let ig = IgnoreBuilder::new().build(); + let wd = WalkDir::new(path); + let walker = ignore::walk::Iter::new(ig, wd); + + let mut stdout = io::BufWriter::new(io::stdout()); + // let mut count = 0; + for dirent in walker { + // count += 1; + stdout.write(dirent.path().as_os_str().as_bytes()).unwrap(); + stdout.write(b"\n").unwrap(); + } + // println!("{}", count); +} +*/ +fn main() {} diff --git a/ignore/src/dir.rs b/ignore/src/dir.rs new file mode 100644 index 000000000..6ac00627c --- /dev/null +++ b/ignore/src/dir.rs @@ -0,0 +1,803 @@ +// This module provides a data structure, `Ignore`, that connects "directory +// traversal" with "ignore matchers." Specifically, it knows about gitignore +// semantics and precedence, and is organized based on directory hierarchy. +// Namely, every matcher logically corresponds to ignore rules from a single +// directory, and points to the matcher for its corresponding parent directory. +// In this sense, `Ignore` is a *persistent* data structure. +// +// This design was specifically chosen to make it possible to use this data +// structure in a parallel directory iterator. +// +// My initial intention was to expose this module as part of this crate's +// public API, but I think the data structure's public API is too complicated +// with non-obvious failure modes. Alas, such things haven't been documented +// well. + +use std::collections::HashMap; +use std::ffi::OsString; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, RwLock}; + +use gitignore::{self, Gitignore, GitignoreBuilder}; +use pathutil::{is_hidden, strip_prefix}; +use overrides::{self, Override}; +use types::{self, Types}; +use {Error, Match, PartialErrorBuilder}; + +/// IgnoreMatch represents information about where a match came from when using +/// the `Ignore` matcher. +#[derive(Clone, Debug)] +pub struct IgnoreMatch<'a>(IgnoreMatchInner<'a>); + +/// IgnoreMatchInner describes precisely where the match information came from. +/// This is private to allow expansion to more matchers in the future. +#[derive(Clone, Debug)] +enum IgnoreMatchInner<'a> { + Override(overrides::Glob<'a>), + Gitignore(&'a gitignore::Glob), + Types(types::Glob<'a>), + Hidden, +} + +impl<'a> IgnoreMatch<'a> { + fn overrides(x: overrides::Glob<'a>) -> IgnoreMatch<'a> { + IgnoreMatch(IgnoreMatchInner::Override(x)) + } + + fn gitignore(x: &'a gitignore::Glob) -> IgnoreMatch<'a> { + IgnoreMatch(IgnoreMatchInner::Gitignore(x)) + } + + fn types(x: types::Glob<'a>) -> IgnoreMatch<'a> { + IgnoreMatch(IgnoreMatchInner::Types(x)) + } + + fn hidden() -> IgnoreMatch<'static> { + IgnoreMatch(IgnoreMatchInner::Hidden) + } +} + +/// Options for the ignore matcher, shared between the matcher itself and the +/// builder. +#[derive(Clone, Copy, Debug)] +struct IgnoreOptions { + /// Whether to ignore hidden file paths or not. + hidden: bool, + /// Whether to read .ignore files. + ignore: bool, + /// Whether to read git's global gitignore file. + git_global: bool, + /// Whether to read .gitignore files. + git_ignore: bool, + /// Whether to read .git/info/exclude files. + git_exclude: bool, +} + +impl IgnoreOptions { + /// Returns true if at least one type of ignore rules should be matched. + fn should_ignores(&self) -> bool { + self.ignore || self.git_global || self.git_ignore || self.git_exclude + } +} + +/// Ignore is a matcher useful for recursively walking one or more directories. +#[derive(Clone, Debug)] +pub struct Ignore(Arc); + +#[derive(Clone, Debug)] +struct IgnoreInner { + /// A map of all existing directories that have already been + /// compiled into matchers. + /// + /// Note that this is never used during matching, only when adding new + /// parent directory matchers. This avoids needing to rebuild glob sets for + /// parent directories if many paths are being searched. + compiled: Arc>>, + /// The path to the directory that this matcher was built from. + dir: PathBuf, + /// An override matcher (default is empty). + overrides: Arc, + /// A file type matcher. + types: Arc, + /// The parent directory to match next. + /// + /// If this is the root directory or there are otherwise no more + /// directories to match, then `parent` is `None`. + parent: Option, + /// Whether this is an absolute parent matcher, as added by add_parent. + is_absolute_parent: bool, + /// The absolute base path of this matcher. Populated only if parent + /// directories are added. + absolute_base: Option>, + /// Explicit ignore matchers specified by the caller. + explicit_ignores: Arc>, + /// The matcher for .ignore files. + ignore_matcher: Gitignore, + /// A global gitignore matcher, usually from $XDG_CONFIG_HOME/git/ignore. + git_global_matcher: Arc, + /// The matcher for .gitignore files. + git_ignore_matcher: Gitignore, + /// Special matcher for `.git/info/exclude` files. + git_exclude_matcher: Gitignore, + /// Whether this directory contains a .git sub-directory. + has_git: bool, + /// Ignore config. + opts: IgnoreOptions, +} + +impl Ignore { + /// Return the directory path of this matcher. + #[allow(dead_code)] + pub fn path(&self) -> &Path { + &self.0.dir + } + + /// Return true if this matcher has no parent. + pub fn is_root(&self) -> bool { + self.0.parent.is_none() + } + + /// Return this matcher's parent, if one exists. + pub fn parent(&self) -> Option { + self.0.parent.clone() + } + + /// Create a new `Ignore` matcher with the parent directories of `dir`. + /// + /// Note that this can only be called on an `Ignore` matcher with no + /// parents (i.e., `is_root` returns `true`). This will panic otherwise. + pub fn add_parents>( + &self, + path: P, + ) -> (Ignore, Option) { + if !self.is_root() { + panic!("Ignore::add_parents called on non-root matcher"); + } + let absolute_base = match path.as_ref().canonicalize() { + Ok(path) => Arc::new(path), + Err(_) => { + // There's not much we can do here, so just return our + // existing matcher. We drop the error to be consistent + // with our general pattern of ignoring I/O errors when + // processing ignore files. + return (self.clone(), None); + } + }; + // List of parents, from child to root. + let mut parents = vec![]; + let mut path = &**absolute_base; + while let Some(parent) = path.parent() { + parents.push(parent); + path = parent; + } + let mut errs = PartialErrorBuilder::default(); + let mut ig = self.clone(); + for parent in parents.into_iter().rev() { + let mut compiled = self.0.compiled.write().unwrap(); + if let Some(prebuilt) = compiled.get(parent.as_os_str()) { + ig = prebuilt.clone(); + continue; + } + let (mut igtmp, err) = ig.add_child_path(parent); + errs.maybe_push(err); + igtmp.is_absolute_parent = true; + igtmp.absolute_base = Some(absolute_base.clone()); + ig = Ignore(Arc::new(igtmp)); + compiled.insert(parent.as_os_str().to_os_string(), ig.clone()); + } + (ig, errs.into_error_option()) + } + + /// Create a new `Ignore` matcher for the given child directory. + /// + /// Since building the matcher may require reading from multiple + /// files, it's possible that this method partially succeeds. Therefore, + /// a matcher is always returned (which may match nothing) and an error is + /// returned if it exists. + /// + /// Note that all I/O errors are completely ignored. + pub fn add_child>( + &self, + dir: P, + ) -> (Ignore, Option) { + let (ig, err) = self.add_child_path(dir.as_ref()); + (Ignore(Arc::new(ig)), err) + } + + /// Like add_child, but takes a full path and returns an IgnoreInner. + fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option) { + static IG_NAMES: &'static [&'static str] = &[".rgignore", ".ignore"]; + + let mut errs = PartialErrorBuilder::default(); + let ig_matcher = + if !self.0.opts.ignore { + Gitignore::empty() + } else { + let (m, err) = create_gitignore(&dir, IG_NAMES); + errs.maybe_push(err); + m + }; + let gi_matcher = + if !self.0.opts.git_ignore { + Gitignore::empty() + } else { + let (m, err) = create_gitignore(&dir, &[".gitignore"]); + errs.maybe_push(err); + m + }; + let gi_exclude_matcher = + if !self.0.opts.git_exclude { + Gitignore::empty() + } else { + let (m, err) = create_gitignore(&dir, &[".git/info/exclude"]); + errs.maybe_push(err); + m + }; + let ig = IgnoreInner { + compiled: self.0.compiled.clone(), + dir: dir.to_path_buf(), + overrides: self.0.overrides.clone(), + types: self.0.types.clone(), + parent: Some(self.clone()), + is_absolute_parent: false, + absolute_base: self.0.absolute_base.clone(), + explicit_ignores: self.0.explicit_ignores.clone(), + ignore_matcher: ig_matcher, + git_global_matcher: self.0.git_global_matcher.clone(), + git_ignore_matcher: gi_matcher, + git_exclude_matcher: gi_exclude_matcher, + has_git: dir.join(".git").is_dir(), + opts: self.0.opts, + }; + (ig, errs.into_error_option()) + } + + /// Returns a match indicating whether the given file path should be + /// ignored or not. + /// + /// The match contains information about its origin. + pub fn matched<'a, P: AsRef>( + &'a self, + path: P, + is_dir: bool, + ) -> Match> { + // We need to be careful with our path. If it has a leading ./, then + // strip it because it causes nothing but trouble. + let mut path = path.as_ref(); + if let Some(p) = strip_prefix("./", path) { + path = p; + } + // Match against the override patterns. If an override matches + // regardless of whether it's whitelist/ignore, then we quit and + // return that result immediately. Overrides have the highest + // precedence. + if !self.0.overrides.is_empty() { + let mat = + self.0.overrides.matched(path, is_dir) + .map(IgnoreMatch::overrides); + if !mat.is_none() { + return mat; + } + } + let mut whitelisted = Match::None; + if self.0.opts.should_ignores() { + let mat = self.matched_ignore(path, is_dir); + if mat.is_ignore() { + return mat; + } else if mat.is_whitelist() { + whitelisted = mat; + } + } + if !self.0.types.is_empty() { + let mat = + self.0.types.matched(path, is_dir).map(IgnoreMatch::types); + if mat.is_ignore() { + return mat; + } else if mat.is_whitelist() { + whitelisted = mat; + } + } + if whitelisted.is_none() && self.0.opts.hidden && is_hidden(path) { + return Match::Ignore(IgnoreMatch::hidden()); + } + whitelisted + } + + /// Performs matching only on the ignore files for this directory and + /// all parent directories. + fn matched_ignore<'a>( + &'a self, + path: &Path, + is_dir: bool, + ) -> Match> { + let (mut m_ignore, mut m_gi, mut m_gi_exclude, mut m_explicit) = + (Match::None, Match::None, Match::None, Match::None); + let mut saw_git = false; + for ig in self.parents().take_while(|ig| !ig.0.is_absolute_parent) { + if m_ignore.is_none() { + m_ignore = + ig.0.ignore_matcher.matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + if !saw_git && m_gi.is_none() { + m_gi = + ig.0.git_ignore_matcher.matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + if !saw_git && m_gi_exclude.is_none() { + m_gi_exclude = + ig.0.git_exclude_matcher.matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + saw_git = saw_git || ig.0.has_git; + } + if let Some(abs_parent_path) = self.absolute_base() { + let path = abs_parent_path.join(path); + for ig in self.parents().skip_while(|ig|!ig.0.is_absolute_parent) { + if m_ignore.is_none() { + m_ignore = + ig.0.ignore_matcher.matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if !saw_git && m_gi.is_none() { + m_gi = + ig.0.git_ignore_matcher.matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if !saw_git && m_gi_exclude.is_none() { + m_gi_exclude = + ig.0.git_exclude_matcher.matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + saw_git = saw_git || ig.0.has_git; + } + } + for gi in self.0.explicit_ignores.iter().rev() { + if !m_explicit.is_none() { + break; + } + m_explicit = gi.matched(&path, is_dir).map(IgnoreMatch::gitignore); + } + let m_global = self.0.git_global_matcher.matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + if !m_ignore.is_none() { + m_ignore + } else if !m_gi.is_none() { + m_gi + } else if !m_gi_exclude.is_none() { + m_gi_exclude + } else if !m_global.is_none() { + m_global + } else if !m_explicit.is_none() { + m_explicit + } else { + Match::None + } + } + + /// Returns an iterator over parent ignore matchers, including this one. + fn parents(&self) -> Parents { + Parents(Some(self)) + } + + /// Returns the first absolute path of the first absolute parent, if + /// one exists. + fn absolute_base(&self) -> Option<&Path> { + self.0.absolute_base.as_ref().map(|p| &***p) + } +} + +struct Parents<'a>(Option<&'a Ignore>); + +impl<'a> Iterator for Parents<'a> { + type Item = &'a Ignore; + + fn next(&mut self) -> Option<&'a Ignore> { + match self.0.take() { + None => None, + Some(ig) => { + self.0 = ig.0.parent.as_ref(); + Some(ig) + } + } + } +} + +/// A builder for creating an Ignore matcher. +#[derive(Clone, Debug)] +pub struct IgnoreBuilder { + /// The root directory path for this ignore matcher. + dir: PathBuf, + /// An override matcher (default is empty). + overrides: Arc, + /// A type matcher (default is empty). + types: Arc, + /// Explicit ignore matchers. + explicit_ignores: Vec, + /// Ignore config. + opts: IgnoreOptions, +} + +impl IgnoreBuilder { + /// Create a new builder for an `Ignore` matcher. + /// + /// All relative file paths are resolved with respect to the current + /// working directory. + pub fn new() -> IgnoreBuilder { + IgnoreBuilder { + dir: Path::new("").to_path_buf(), + overrides: Arc::new(Override::empty()), + types: Arc::new(Types::empty()), + explicit_ignores: vec![], + opts: IgnoreOptions { + hidden: true, + ignore: true, + git_global: true, + git_ignore: true, + git_exclude: true, + }, + } + } + + /// Builds a new `Ignore` matcher. + /// + /// The matcher returned won't match anything until ignore rules from + /// directories are added to it. + pub fn build(&self) -> Ignore { + let git_global_matcher = + if !self.opts.git_global { + Gitignore::empty() + } else { + let (gi, err) = Gitignore::global(); + if let Some(err) = err { + debug!("{}", err); + } + gi + }; + Ignore(Arc::new(IgnoreInner { + compiled: Arc::new(RwLock::new(HashMap::new())), + dir: self.dir.clone(), + overrides: self.overrides.clone(), + types: self.types.clone(), + parent: None, + is_absolute_parent: true, + absolute_base: None, + explicit_ignores: Arc::new(self.explicit_ignores.clone()), + ignore_matcher: Gitignore::empty(), + git_global_matcher: Arc::new(git_global_matcher), + git_ignore_matcher: Gitignore::empty(), + git_exclude_matcher: Gitignore::empty(), + has_git: false, + opts: self.opts, + })) + } + + /// Add an override matcher. + /// + /// By default, no override matcher is used. + /// + /// This overrides any previous setting. + pub fn overrides(&mut self, overrides: Override) -> &mut IgnoreBuilder { + self.overrides = Arc::new(overrides); + self + } + + /// Add a file type matcher. + /// + /// By default, no file type matcher is used. + /// + /// This overrides any previous setting. + pub fn types(&mut self, types: Types) -> &mut IgnoreBuilder { + self.types = Arc::new(types); + self + } + + /// Adds a new global ignore matcher from the ignore file path given. + pub fn add_ignore(&mut self, ig: Gitignore) -> &mut IgnoreBuilder { + self.explicit_ignores.push(ig); + self + } + + /// Enables ignoring hidden files. + /// + /// This is enabled by default. + pub fn hidden(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.hidden = yes; + self + } + + /// Enables reading `.ignore` files. + /// + /// `.ignore` files have the same semantics as `gitignore` files and are + /// supported by search tools such as ripgrep and The Silver Searcher. + /// + /// This is enabled by default. + pub fn ignore(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.ignore = yes; + self + } + + /// Add a global gitignore matcher. + /// + /// Its precedence is lower than both normal `.gitignore` files and + /// `.git/info/exclude` files. + /// + /// This overwrites any previous global gitignore setting. + /// + /// This is enabled by default. + pub fn git_global(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.git_global = yes; + self + } + + /// Enables reading `.gitignore` files. + /// + /// `.gitignore` files have match semantics as described in the `gitignore` + /// man page. + /// + /// This is enabled by default. + pub fn git_ignore(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.git_ignore = yes; + self + } + + /// Enables reading `.git/info/exclude` files. + /// + /// `.git/info/exclude` files have match semantics as described in the + /// `gitignore` man page. + /// + /// This is enabled by default. + pub fn git_exclude(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.git_exclude = yes; + self + } +} + +/// Creates a new gitignore matcher for the directory given. +/// +/// Ignore globs are extracted from each of the file names in `dir` in the +/// order given (earlier names have lower precedence than later names). +/// +/// I/O errors are ignored. +pub fn create_gitignore( + dir: &Path, + names: &[&str], +) -> (Gitignore, Option) { + let mut builder = GitignoreBuilder::new(dir); + let mut errs = PartialErrorBuilder::default(); + for name in names { + let gipath = dir.join(name); + errs.maybe_push_ignore_io(builder.add(gipath)); + } + let gi = match builder.build() { + Ok(gi) => gi, + Err(err) => { + errs.push(err); + GitignoreBuilder::new(dir).build().unwrap() + } + }; + (gi, errs.into_error_option()) +} + +#[cfg(test)] +mod tests { + use std::fs::{self, File}; + use std::io::Write; + use std::path::Path; + + use tempdir::TempDir; + + use dir::IgnoreBuilder; + use gitignore::Gitignore; + use Error; + + fn wfile>(path: P, contents: &str) { + let mut file = File::create(path).unwrap(); + file.write_all(contents.as_bytes()).unwrap(); + } + + fn mkdirp>(path: P) { + fs::create_dir_all(path).unwrap(); + } + + fn partial(err: Error) -> Vec { + match err { + Error::Partial(errs) => errs, + _ => panic!("expected partial error but got {:?}", err), + } + } + + #[test] + fn explicit_ignore() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join("not-an-ignore"), "foo\n!bar"); + + let (gi, err) = Gitignore::new(td.path().join("not-an-ignore")); + assert!(err.is_none()); + let (ig, err) = IgnoreBuilder::new() + .add_ignore(gi).build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn git_exclude() { + let td = TempDir::new("ignore-test-").unwrap(); + mkdirp(td.path().join(".git/info")); + wfile(td.path().join(".git/info/exclude"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn gitignore() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn ignore() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".ignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + // Tests that an .ignore will override a .gitignore. + #[test] + fn ignore_over_gitignore() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join(".ignore"), "!foo"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_whitelist()); + } + + // Tests that exclude has lower precedent than both .ignore and .gitignore. + #[test] + fn exclude_lowest() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "!foo"); + wfile(td.path().join(".ignore"), "!bar"); + mkdirp(td.path().join(".git/info")); + wfile(td.path().join(".git/info/exclude"), "foo\nbar\nbaz"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("baz", false).is_ignore()); + assert!(ig.matched("foo", false).is_whitelist()); + assert!(ig.matched("bar", false).is_whitelist()); + } + + #[test] + fn errored() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "f**oo"); + + let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_some()); + } + + #[test] + fn errored_both() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "f**oo"); + wfile(td.path().join(".ignore"), "fo**o"); + + let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert_eq!(2, partial(err.expect("an error")).len()); + } + + #[test] + fn errored_partial() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "f**oo\nbar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_some()); + assert!(ig.matched("bar", false).is_ignore()); + } + + #[test] + fn errored_partial_and_ignore() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "f**oo\nbar"); + wfile(td.path().join(".ignore"), "!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_some()); + assert!(ig.matched("bar", false).is_whitelist()); + } + + #[test] + fn not_present_empty() { + let td = TempDir::new("ignore-test-").unwrap(); + + let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + } + + #[test] + fn stops_at_git_dir() { + // This tests that .gitignore files beyond a .git barrier aren't + // matched, but .ignore files are. + let td = TempDir::new("ignore-test-").unwrap(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("foo/.git")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join(".ignore"), "bar"); + + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_child(td.path()); + assert!(err.is_none()); + let (ig2, err) = ig1.add_child(ig1.path().join("foo")); + assert!(err.is_none()); + + assert!(ig1.matched("foo", false).is_ignore()); + assert!(ig2.matched("foo", false).is_none()); + + assert!(ig1.matched("bar", false).is_ignore()); + assert!(ig2.matched("bar", false).is_ignore()); + } + + #[test] + fn absolute_parent() { + let td = TempDir::new("ignore-test-").unwrap(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("foo")); + wfile(td.path().join(".gitignore"), "bar"); + + // First, check that the parent gitignore file isn't detected if the + // parent isn't added. This establishes a baseline. + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_child(td.path().join("foo")); + assert!(err.is_none()); + assert!(ig1.matched("bar", false).is_none()); + + // Second, check that adding a parent directory actually works. + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_parents(td.path().join("foo")); + assert!(err.is_none()); + let (ig2, err) = ig1.add_child(td.path().join("foo")); + assert!(err.is_none()); + assert!(ig2.matched("bar", false).is_ignore()); + } + + #[test] + fn absolute_parent_anchored() { + let td = TempDir::new("ignore-test-").unwrap(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("src/llvm")); + wfile(td.path().join(".gitignore"), "/llvm/\nfoo"); + + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_parents(td.path().join("src")); + assert!(err.is_none()); + let (ig2, err) = ig1.add_child("src"); + assert!(err.is_none()); + + assert!(ig1.matched("llvm", true).is_none()); + assert!(ig2.matched("llvm", true).is_none()); + assert!(ig2.matched("src/llvm", true).is_none()); + assert!(ig2.matched("foo", false).is_ignore()); + assert!(ig2.matched("src/foo", false).is_ignore()); + } +} diff --git a/ignore/src/gitignore.rs b/ignore/src/gitignore.rs new file mode 100644 index 000000000..c44910ff8 --- /dev/null +++ b/ignore/src/gitignore.rs @@ -0,0 +1,607 @@ +/*! +The gitignore module provides a way to match globs from a gitignore file +against file paths. + +Note that this module implements the specification as described in the +`gitignore` man page from scratch. That is, this module does *not* shell out to +the `git` command line tool. +*/ + +use std::cell::RefCell; +use std::env; +use std::fs::File; +use std::io::{self, BufRead, Read}; +use std::path::{Path, PathBuf}; +use std::str; +use std::sync::Arc; + +use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder}; +use regex::bytes::Regex; +use thread_local::ThreadLocal; + +use pathutil::{is_file_name, strip_prefix}; +use {Error, Match, PartialErrorBuilder}; + +/// Glob represents a single glob in a gitignore file. +/// +/// This is used to report information about the highest precedent glob that +/// matched in one or more gitignore files. +#[derive(Clone, Debug)] +pub struct Glob { + /// The file path that this glob was extracted from. + from: Option, + /// The original glob string. + original: String, + /// The actual glob string used to convert to a regex. + actual: String, + /// Whether this is a whitelisted glob or not. + is_whitelist: bool, + /// Whether this glob should only match directories or not. + is_only_dir: bool, +} + +impl Glob { + /// Returns the file path that defined this glob. + pub fn from(&self) -> Option<&Path> { + self.from.as_ref().map(|p| &**p) + } + + /// The original glob as it was defined in a gitignore file. + pub fn original(&self) -> &str { + &self.original + } + + /// The actual glob that was compiled to respect gitignore + /// semantics. + pub fn actual(&self) -> &str { + &self.actual + } + + /// Whether this was a whitelisted glob or not. + pub fn is_whitelist(&self) -> bool { + self.is_whitelist + } + + /// Whether this glob must match a directory or not. + pub fn is_only_dir(&self) -> bool { + self.is_only_dir + } +} + +/// Gitignore is a matcher for the globs in one or more gitignore files +/// in the same directory. +#[derive(Clone, Debug)] +pub struct Gitignore { + set: GlobSet, + root: PathBuf, + globs: Vec, + num_ignores: u64, + num_whitelists: u64, + matches: Arc>>>, +} + +impl Gitignore { + /// Creates a new gitignore matcher from the gitignore file path given. + /// + /// If it's desirable to include multiple gitignore files in a single + /// matcher, or read gitignore globs from a different source, then + /// use `GitignoreBuilder`. + /// + /// This always returns a valid matcher, even if it's empty. In particular, + /// a Gitignore file can be partially valid, e.g., when one glob is invalid + /// but the rest aren't. + /// + /// Note that I/O errors are ignored. For more granular control over + /// errors, use `GitignoreBuilder`. + pub fn new>( + gitignore_path: P, + ) -> (Gitignore, Option) { + let path = gitignore_path.as_ref(); + let parent = path.parent().unwrap_or(Path::new("/")); + let mut builder = GitignoreBuilder::new(parent); + let mut errs = PartialErrorBuilder::default(); + errs.maybe_push_ignore_io(builder.add(path)); + match builder.build() { + Ok(gi) => (gi, errs.into_error_option()), + Err(err) => { + errs.push(err); + (Gitignore::empty(), errs.into_error_option()) + } + } + } + + /// Creates a new gitignore matcher from the global ignore file, if one + /// exists. + /// + /// The global config file path is specified by git's `core.excludesFile` + /// config option. + /// + /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig` + /// does not exist or does not specify `core.excludesFile`, then + /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not + /// set or is empty, then `$HOME/.config/git/ignore` is used instead. + pub fn global() -> (Gitignore, Option) { + match gitconfig_excludes_path() { + None => (Gitignore::empty(), None), + Some(path) => { + if !path.is_file() { + (Gitignore::empty(), None) + } else { + Gitignore::new(path) + } + } + } + } + + /// Creates a new empty gitignore matcher that never matches anything. + /// + /// Its path is empty. + pub fn empty() -> Gitignore { + GitignoreBuilder::new("").build().unwrap() + } + + /// Returns the directory containing this gitignore matcher. + /// + /// All matches are done relative to this path. + pub fn path(&self) -> &Path { + &*self.root + } + + /// Returns true if and only if this gitignore has zero globs, and + /// therefore never matches any file path. + pub fn is_empty(&self) -> bool { + self.set.is_empty() + } + + /// Returns the total number of globs, which should be equivalent to + /// `num_ignores + num_whitelists`. + pub fn len(&self) -> usize { + self.set.len() + } + + /// Returns the total number of ignore globs. + pub fn num_ignores(&self) -> u64 { + self.num_ignores + } + + /// Returns the total number of whitelisted globs. + pub fn num_whitelists(&self) -> u64 { + self.num_whitelists + } + + /// Returns whether the given file path matched a pattern in this gitignore + /// matcher. + /// + /// `is_dir` should be true if the path refers to a directory and false + /// otherwise. + /// + /// The given path is matched relative to the path given when building + /// the matcher. Specifically, before matching `path`, its prefix (as + /// determined by a common suffix of the directory containing this + /// gitignore) is stripped. If there is no common suffix/prefix overlap, + /// then `path` is assumed to be relative to this matcher. + pub fn matched>( + &self, + path: P, + is_dir: bool, + ) -> Match<&Glob> { + if self.is_empty() { + return Match::None; + } + self.matched_stripped(self.strip(path.as_ref()), is_dir) + } + + /// Like matched, but takes a path that has already been stripped. + fn matched_stripped>( + &self, + path: P, + is_dir: bool, + ) -> Match<&Glob> { + if self.is_empty() { + return Match::None; + } + let path = path.as_ref(); + let _matches = self.matches.get_default(); + let mut matches = _matches.borrow_mut(); + let candidate = Candidate::new(path); + self.set.matches_candidate_into(&candidate, &mut *matches); + for &i in matches.iter().rev() { + let glob = &self.globs[i]; + if !glob.is_only_dir() || is_dir { + return if glob.is_whitelist() { + Match::Whitelist(glob) + } else { + Match::Ignore(glob) + }; + } + } + Match::None + } + + /// Strips the given path such that it's suitable for matching with this + /// gitignore matcher. + fn strip<'a, P: 'a + AsRef + ?Sized>( + &'a self, + path: &'a P, + ) -> &'a Path { + let mut path = path.as_ref(); + // A leading ./ is completely superfluous. We also strip it from + // our gitignore root path, so we need to strip it from our candidate + // path too. + if let Some(p) = strip_prefix("./", path) { + path = p; + } + // Strip any common prefix between the candidate path and the root + // of the gitignore, to make sure we get relative matching right. + // BUT, a file name might not have any directory components to it, + // in which case, we don't want to accidentally strip any part of the + // file name. + if !is_file_name(path) { + if let Some(p) = strip_prefix(&self.root, path) { + path = p; + // If we're left with a leading slash, get rid of it. + if let Some(p) = strip_prefix("/", path) { + path = p; + } + } + } + path + } +} + +/// Builds a matcher for a single set of globs from a .gitignore file. +pub struct GitignoreBuilder { + builder: GlobSetBuilder, + root: PathBuf, + globs: Vec, +} + +impl GitignoreBuilder { + /// Create a new builder for a gitignore file. + /// + /// The path given should be the path at which the globs for this gitignore + /// file should be matched. Note that paths are always matched relative + /// to the root path given here. Generally, the root path should correspond + /// to the *directory* containing a `.gitignore` file. + pub fn new>(root: P) -> GitignoreBuilder { + let root = root.as_ref(); + GitignoreBuilder { + builder: GlobSetBuilder::new(), + root: strip_prefix("./", root).unwrap_or(root).to_path_buf(), + globs: vec![], + } + } + + /// Builds a new matcher from the globs added so far. + /// + /// Once a matcher is built, no new globs can be added to it. + pub fn build(&self) -> Result { + let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count(); + let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count(); + let set = try!( + self.builder.build().map_err(|err| Error::Glob(err.to_string()))); + Ok(Gitignore { + set: set, + root: self.root.clone(), + globs: self.globs.clone(), + num_ignores: nignore as u64, + num_whitelists: nwhite as u64, + matches: Arc::new(ThreadLocal::default()), + }) + } + + /// Add each glob from the file path given. + /// + /// The file given should be formatted as a `gitignore` file. + /// + /// Note that partial errors can be returned. For example, if there was + /// a problem adding one glob, an error for that will be returned, but + /// all other valid globs will still be added. + pub fn add>(&mut self, path: P) -> Option { + let path = path.as_ref(); + let file = match File::open(path) { + Err(err) => return Some(Error::Io(err).with_path(path)), + Ok(file) => file, + }; + let rdr = io::BufReader::new(file); + let mut errs = PartialErrorBuilder::default(); + for (i, line) in rdr.lines().enumerate() { + let lineno = (i + 1) as u64; + let line = match line { + Ok(line) => line, + Err(err) => { + errs.push(Error::Io(err).tagged(path, lineno)); + continue; + } + }; + if let Err(err) = self.add_line(Some(path.to_path_buf()), &line) { + errs.push(err.tagged(path, lineno)); + } + } + errs.into_error_option() + } + + /// Add each glob line from the string given. + /// + /// If this string came from a particular `gitignore` file, then its path + /// should be provided here. + /// + /// The string given should be formatted as a `gitignore` file. + #[cfg(test)] + fn add_str( + &mut self, + from: Option, + gitignore: &str, + ) -> Result<&mut GitignoreBuilder, Error> { + for line in gitignore.lines() { + try!(self.add_line(from.clone(), line)); + } + Ok(self) + } + + /// Add a line from a gitignore file to this builder. + /// + /// If this line came from a particular `gitignore` file, then its path + /// should be provided here. + /// + /// If the line could not be parsed as a glob, then an error is returned. + pub fn add_line( + &mut self, + from: Option, + mut line: &str, + ) -> Result<&mut GitignoreBuilder, Error> { + if line.starts_with("#") { + return Ok(self); + } + if !line.ends_with("\\ ") { + line = line.trim_right(); + } + if line.is_empty() { + return Ok(self); + } + let mut glob = Glob { + from: from, + original: line.to_string(), + actual: String::new(), + is_whitelist: false, + is_only_dir: false, + }; + let mut literal_separator = false; + let has_slash = line.chars().any(|c| c == '/'); + let is_absolute = line.chars().nth(0).unwrap() == '/'; + if line.starts_with("\\!") || line.starts_with("\\#") { + line = &line[1..]; + } else { + if line.starts_with("!") { + glob.is_whitelist = true; + line = &line[1..]; + } + if line.starts_with("/") { + // `man gitignore` says that if a glob starts with a slash, + // then the glob can only match the beginning of a path + // (relative to the location of gitignore). We achieve this by + // simply banning wildcards from matching /. + literal_separator = true; + line = &line[1..]; + } + } + // If it ends with a slash, then this should only match directories, + // but the slash should otherwise not be used while globbing. + if let Some((i, c)) = line.char_indices().rev().nth(0) { + if c == '/' { + glob.is_only_dir = true; + line = &line[..i]; + } + } + // If there is a literal slash, then we note that so that globbing + // doesn't let wildcards match slashes. + glob.actual = line.to_string(); + if has_slash { + literal_separator = true; + } + // If there was a leading slash, then this is a glob that must + // match the entire path name. Otherwise, we should let it match + // anywhere, so use a **/ prefix. + if !is_absolute { + // ... but only if we don't already have a **/ prefix. + if !glob.actual.starts_with("**/") { + glob.actual = format!("**/{}", glob.actual); + } + } + // If the glob ends with `/**`, then we should only match everything + // inside a directory, but not the directory itself. Standard globs + // will match the directory. So we add `/*` to force the issue. + if glob.actual.ends_with("/**") { + glob.actual = format!("{}/*", glob.actual); + } + let parsed = try!( + GlobBuilder::new(&glob.actual) + .literal_separator(literal_separator) + .build() + .map_err(|err| Error::Glob(err.to_string()))); + self.builder.add(parsed); + self.globs.push(glob); + Ok(self) + } +} + +/// Return the file path of the current environment's global gitignore file. +/// +/// Note that the file path returned may not exist. +fn gitconfig_excludes_path() -> Option { + gitconfig_contents() + .and_then(|data| parse_excludes_file(&data)) + .or_else(excludes_file_default) +} + +/// Returns the file contents of git's global config file, if one exists. +fn gitconfig_contents() -> Option> { + let home = match env::var_os("HOME") { + None => return None, + Some(home) => PathBuf::from(home), + }; + let mut file = match File::open(home.join(".gitconfig")) { + Err(_) => return None, + Ok(file) => io::BufReader::new(file), + }; + let mut contents = vec![]; + file.read_to_end(&mut contents).ok().map(|_| contents) +} + +/// Returns the default file path for a global .gitignore file. +/// +/// Specifically, this respects XDG_CONFIG_HOME. +fn excludes_file_default() -> Option { + env::var_os("XDG_CONFIG_HOME") + .and_then(|x| if x.is_empty() { None } else { Some(x) }) + .or_else(|| env::var_os("HOME")) + .map(|x| PathBuf::from(x).join("git/ignore")) +} + +/// Extract git's `core.excludesfile` config setting from the raw file contents +/// given. +fn parse_excludes_file(data: &[u8]) -> Option { + // N.B. This is the lazy approach, and isn't technically correct, but + // probably works in more circumstances. I guess we would ideally have + // a full INI parser. Yuck. + lazy_static! { + static ref RE: Regex = Regex::new( + r"(?ium)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap(); + }; + let caps = match RE.captures(data) { + None => return None, + Some(caps) => caps, + }; + str::from_utf8(&caps[1]).ok().map(|s| PathBuf::from(expand_tilde(s))) +} + +/// Expands ~ in file paths to the value of $HOME. +fn expand_tilde(path: &str) -> String { + let home = match env::var("HOME") { + Err(_) => return path.to_string(), + Ok(home) => home, + }; + path.replace("~", &home) +} + +#[cfg(test)] +mod tests { + use std::path::Path; + use super::{Gitignore, GitignoreBuilder}; + + fn gi_from_str>(root: P, s: &str) -> Gitignore { + let mut builder = GitignoreBuilder::new(root); + builder.add_str(None, s).unwrap(); + builder.build().unwrap() + } + + macro_rules! ignored { + ($name:ident, $root:expr, $gi:expr, $path:expr) => { + ignored!($name, $root, $gi, $path, false); + }; + ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { + #[test] + fn $name() { + let gi = gi_from_str($root, $gi); + assert!(gi.matched($path, $is_dir).is_ignore()); + } + }; + } + + macro_rules! not_ignored { + ($name:ident, $root:expr, $gi:expr, $path:expr) => { + not_ignored!($name, $root, $gi, $path, false); + }; + ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { + #[test] + fn $name() { + let gi = gi_from_str($root, $gi); + assert!(!gi.matched($path, $is_dir).is_ignore()); + } + }; + } + + const ROOT: &'static str = "/home/foobar/rust/rg"; + + ignored!(ig1, ROOT, "months", "months"); + ignored!(ig2, ROOT, "*.lock", "Cargo.lock"); + ignored!(ig3, ROOT, "*.rs", "src/main.rs"); + ignored!(ig4, ROOT, "src/*.rs", "src/main.rs"); + ignored!(ig5, ROOT, "/*.c", "cat-file.c"); + ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs"); + ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs"); + ignored!(ig8, ROOT, "foo/", "foo", true); + ignored!(ig9, ROOT, "**/foo", "foo"); + ignored!(ig10, ROOT, "**/foo", "src/foo"); + ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar"); + ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz"); + ignored!(ig13, ROOT, "**/foo/bar", "foo/bar"); + ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar"); + ignored!(ig15, ROOT, "abc/**", "abc/x"); + ignored!(ig16, ROOT, "abc/**", "abc/x/y"); + ignored!(ig17, ROOT, "abc/**", "abc/x/y/z"); + ignored!(ig18, ROOT, "a/**/b", "a/b"); + ignored!(ig19, ROOT, "a/**/b", "a/x/b"); + ignored!(ig20, ROOT, "a/**/b", "a/x/y/b"); + ignored!(ig21, ROOT, r"\!xy", "!xy"); + ignored!(ig22, ROOT, r"\#foo", "#foo"); + ignored!(ig23, ROOT, "foo", "./foo"); + ignored!(ig24, ROOT, "target", "grep/target"); + ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock"); + ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz"); + ignored!(ig27, ROOT, "foo/", "xyz/foo", true); + ignored!(ig28, ROOT, "src/*.rs", "src/grep/src/main.rs"); + ignored!(ig29, "./src", "/llvm/", "./src/llvm", true); + ignored!(ig30, ROOT, "node_modules/ ", "node_modules", true); + + not_ignored!(ignot1, ROOT, "amonths", "months"); + not_ignored!(ignot2, ROOT, "monthsa", "months"); + not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs"); + not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c"); + not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs"); + not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs"); + not_ignored!(ignot7, ROOT, "foo/", "foo", false); + not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz"); + not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz"); + not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar"); + not_ignored!(ignot11, ROOT, "#foo", "#foo"); + not_ignored!(ignot12, ROOT, "\n\n\n", "foo"); + not_ignored!(ignot13, ROOT, "foo/**", "foo", true); + not_ignored!( + ignot14, "./third_party/protobuf", "m4/ltoptions.m4", + "./third_party/protobuf/csharp/src/packages/repositories.config"); + + fn bytes(s: &str) -> Vec { + s.to_string().into_bytes() + } + + fn path_string>(path: P) -> String { + path.as_ref().to_str().unwrap().to_string() + } + + #[test] + fn parse_excludes_file1() { + let data = bytes("[core]\nexcludesFile = /foo/bar"); + let got = super::parse_excludes_file(&data).unwrap(); + assert_eq!(path_string(got), "/foo/bar"); + } + + #[test] + fn parse_excludes_file2() { + let data = bytes("[core]\nexcludesFile = ~/foo/bar"); + let got = super::parse_excludes_file(&data).unwrap(); + assert_eq!(path_string(got), super::expand_tilde("~/foo/bar")); + } + + #[test] + fn parse_excludes_file3() { + let data = bytes("[core]\nexcludeFile = /foo/bar"); + assert!(super::parse_excludes_file(&data).is_none()); + } + + // See: https://github.com/BurntSushi/ripgrep/issues/106 + #[test] + fn regression_106() { + gi_from_str("/", " "); + } +} diff --git a/ignore/src/lib.rs b/ignore/src/lib.rs new file mode 100644 index 000000000..a3aa0c8fb --- /dev/null +++ b/ignore/src/lib.rs @@ -0,0 +1,300 @@ +/*! +The ignore crate provides a fast recursive directory iterator that respects +various filters such as globs, file types and `.gitignore` files. The precise +matching rules and precedence is explained in the documentation for +`WalkBuilder`. + +Secondarily, this crate exposes gitignore and file type matchers for use cases +that demand more fine-grained control. + +# Example + +This example shows the most basic usage of this crate. This code will +recursively traverse the current directory while automatically filtering out +files and directories according to ignore globs found in files like +`.ignore` and `.gitignore`: + + +```rust,no_run +use ignore::Walk; + +for result in Walk::new("./") { + // Each item yielded by the iterator is either a directory entry or an + // error, so either print the path or the error. + match result { + Ok(entry) => println!("{}", entry.path().display()), + Err(err) => println!("ERROR: {}", err), + } +} +``` + +# Example: advanced + +By default, the recursive directory iterator will ignore hidden files and +directories. This can be disabled by building the iterator with `WalkBuilder`: + +```rust,no_run +use ignore::WalkBuilder; + +for result in WalkBuilder::new("./").hidden(false).build() { + println!("{:?}", result); +} +``` + +See the documentation for `WalkBuilder` for many other options. +*/ + +extern crate globset; +#[macro_use] +extern crate lazy_static; +#[macro_use] +extern crate log; +extern crate memchr; +extern crate regex; +#[cfg(test)] +extern crate tempdir; +extern crate thread_local; +extern crate walkdir; + +use std::error; +use std::fmt; +use std::io; +use std::path::{Path, PathBuf}; + +pub use walk::{DirEntry, Walk, WalkBuilder}; + +mod dir; +pub mod gitignore; +mod pathutil; +pub mod overrides; +pub mod types; +mod walk; + +/// Represents an error that can occur when parsing a gitignore file. +#[derive(Debug)] +pub enum Error { + /// A collection of "soft" errors. These occur when adding an ignore + /// file partially succeeded. + Partial(Vec), + /// An error associated with a specific line number. + WithLineNumber { line: u64, err: Box }, + /// An error associated with a particular file path. + WithPath { path: PathBuf, err: Box }, + /// An error that occurs when doing I/O, such as reading an ignore file. + Io(io::Error), + /// An error that occurs when trying to parse a glob. + Glob(String), + /// A type selection for a file type that is not defined. + UnrecognizedFileType(String), + /// A user specified file type definition could not be parsed. + InvalidDefinition, +} + +impl Error { + /// Returns true if this is a partial error. + /// + /// A partial error occurs when only some operations failed while others + /// may have succeeded. For example, an ignore file may contain an invalid + /// glob among otherwise valid globs. + pub fn is_partial(&self) -> bool { + match *self { + Error::Partial(_) => true, + Error::WithLineNumber { ref err, .. } => err.is_partial(), + Error::WithPath { ref err, .. } => err.is_partial(), + _ => false, + } + } + + /// Returns true if this error is exclusively an I/O error. + pub fn is_io(&self) -> bool { + match *self { + Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(), + Error::WithLineNumber { ref err, .. } => err.is_io(), + Error::WithPath { ref err, .. } => err.is_io(), + Error::Io(_) => true, + Error::Glob(_) => false, + Error::UnrecognizedFileType(_) => false, + Error::InvalidDefinition => false, + } + } + + /// Turn an error into a tagged error with the given file path. + fn with_path>(self, path: P) -> Error { + Error::WithPath { + path: path.as_ref().to_path_buf(), + err: Box::new(self), + } + } + + /// Turn an error into a tagged error with the given file path and line + /// number. If path is empty, then it is omitted from the error. + fn tagged>(self, path: P, lineno: u64) -> Error { + let errline = Error::WithLineNumber { + line: lineno, + err: Box::new(self), + }; + if path.as_ref().as_os_str().is_empty() { + return errline; + } + errline.with_path(path) + } +} + +impl error::Error for Error { + fn description(&self) -> &str { + match *self { + Error::Partial(_) => "partial error", + Error::WithLineNumber { ref err, .. } => err.description(), + Error::WithPath { ref err, .. } => err.description(), + Error::Io(ref err) => err.description(), + Error::Glob(ref msg) => msg, + Error::UnrecognizedFileType(_) => "unrecognized file type", + Error::InvalidDefinition => "invalid definition", + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Error::Partial(ref errs) => { + let msgs: Vec = + errs.iter().map(|err| err.to_string()).collect(); + write!(f, "{}", msgs.join("\n")) + } + Error::WithLineNumber { line, ref err } => { + write!(f, "line {}: {}", line, err) + } + Error::WithPath { ref path, ref err } => { + write!(f, "{}: {}", path.display(), err) + } + Error::Io(ref err) => err.fmt(f), + Error::Glob(ref msg) => write!(f, "{}", msg), + Error::UnrecognizedFileType(ref ty) => { + write!(f, "unrecognized file type: {}", ty) + } + Error::InvalidDefinition => { + write!(f, "invalid definition (format is type:glob, e.g., \ + html:*.html)") + } + } + } +} + +impl From for Error { + fn from(err: io::Error) -> Error { + Error::Io(err) + } +} + +#[derive(Debug, Default)] +struct PartialErrorBuilder(Vec); + +impl PartialErrorBuilder { + fn push(&mut self, err: Error) { + self.0.push(err); + } + + fn push_ignore_io(&mut self, err: Error) { + if !err.is_io() { + self.push(err); + } + } + + fn maybe_push(&mut self, err: Option) { + if let Some(err) = err { + self.push(err); + } + } + + fn maybe_push_ignore_io(&mut self, err: Option) { + if let Some(err) = err { + self.push_ignore_io(err); + } + } + + fn into_error_option(mut self) -> Option { + if self.0.is_empty() { + None + } else if self.0.len() == 1 { + Some(self.0.pop().unwrap()) + } else { + Some(Error::Partial(self.0)) + } + } +} + +/// The result of a glob match. +/// +/// The type parameter `T` typically refers to a type that provides more +/// information about a particular match. For example, it might identify +/// the specific gitignore file and the specific glob pattern that caused +/// the match. +#[derive(Clone, Debug)] +pub enum Match { + /// The path didn't match any glob. + None, + /// The highest precedent glob matched indicates the path should be + /// ignored. + Ignore(T), + /// The highest precedent glob matched indicates the path should be + /// whitelisted. + Whitelist(T), +} + +impl Match { + /// Returns true if the match result didn't match any globs. + pub fn is_none(&self) -> bool { + match *self { + Match::None => true, + Match::Ignore(_) | Match::Whitelist(_) => false, + } + } + + /// Returns true if the match result implies the path should be ignored. + pub fn is_ignore(&self) -> bool { + match *self { + Match::Ignore(_) => true, + Match::None | Match::Whitelist(_) => false, + } + } + + /// Returns true if the match result implies the path should be + /// whitelisted. + pub fn is_whitelist(&self) -> bool { + match *self { + Match::Whitelist(_) => true, + Match::None | Match::Ignore(_) => false, + } + } + + /// Inverts the match so that `Ignore` becomes `Whitelist` and + /// `Whitelist` becomes `Ignore`. A non-match remains the same. + pub fn invert(self) -> Match { + match self { + Match::None => Match::None, + Match::Ignore(t) => Match::Whitelist(t), + Match::Whitelist(t) => Match::Ignore(t), + } + } + + /// Return the value inside this match if it exists. + pub fn inner(&self) -> Option<&T> { + match *self { + Match::None => None, + Match::Ignore(ref t) => Some(t), + Match::Whitelist(ref t) => Some(t), + } + } + + /// Apply the given function to the value inside this match. + /// + /// If the match has no value, then return the match unchanged. + pub fn map U>(self, f: F) -> Match { + match self { + Match::None => Match::None, + Match::Ignore(t) => Match::Ignore(f(t)), + Match::Whitelist(t) => Match::Whitelist(f(t)), + } + } +} diff --git a/ignore/src/overrides.rs b/ignore/src/overrides.rs new file mode 100644 index 000000000..c53a50f72 --- /dev/null +++ b/ignore/src/overrides.rs @@ -0,0 +1,202 @@ +/*! +The overrides module provides a way to specify a set of override globs. +This provides functionality similar to `--include` or `--exclude` in command +line tools. +*/ + +use std::path::Path; + +use gitignore::{self, Gitignore, GitignoreBuilder}; +use {Error, Match}; + +/// Glob represents a single glob in an override matcher. +/// +/// This is used to report information about the highest precedent glob +/// that matched. +/// +/// Note that not all matches necessarily correspond to a specific glob. For +/// example, if there are one or more whitelist globs and a file path doesn't +/// match any glob in the set, then the file path is considered to be ignored. +/// +/// The lifetime `'a` refers to the lifetime of the matcher that produced +/// this glob. +#[derive(Clone, Debug)] +pub struct Glob<'a>(GlobInner<'a>); + +#[derive(Clone, Debug)] +enum GlobInner<'a> { + /// No glob matched, but the file path should still be ignored. + UnmatchedIgnore, + /// A glob matched. + Matched(&'a gitignore::Glob), +} + +impl<'a> Glob<'a> { + fn unmatched() -> Glob<'a> { + Glob(GlobInner::UnmatchedIgnore) + } +} + +/// Manages a set of overrides provided explicitly by the end user. +#[derive(Clone, Debug)] +pub struct Override(Gitignore); + +impl Override { + /// Returns an empty matcher that never matches any file path. + pub fn empty() -> Override { + Override(Gitignore::empty()) + } + + /// Returns the directory of this override set. + /// + /// All matches are done relative to this path. + pub fn path(&self) -> &Path { + self.0.path() + } + + /// Returns true if and only if this matcher is empty. + /// + /// When a matcher is empty, it will never match any file path. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns the total number of ignore globs. + pub fn num_ignores(&self) -> u64 { + self.0.num_whitelists() + } + + /// Returns the total number of whitelisted globs. + pub fn num_whitelists(&self) -> u64 { + self.0.num_ignores() + } + + /// Returns whether the given file path matched a pattern in this override + /// matcher. + /// + /// `is_dir` should be true if the path refers to a directory and false + /// otherwise. + /// + /// If there are no overrides, then this always returns `Match::None`. + /// + /// If there is at least one whitelist override, then this never returns + /// `Match::None`, since non-matches are interpreted as ignored. + /// + /// The given path is matched to the globs relative to the path given + /// when building the override matcher. Specifically, before matching + /// `path`, its prefix (as determined by a common suffix of the directory + /// given) is stripped. If there is no common suffix/prefix overlap, then + /// `path` is assumed to reside in the same directory as the root path for + /// this set of overrides. + pub fn matched<'a, P: AsRef>( + &'a self, + path: P, + is_dir: bool, + ) -> Match> { + if self.is_empty() { + return Match::None; + } + let mat = self.0.matched(path, is_dir).invert(); + if mat.is_none() && self.num_whitelists() > 0 { + return Match::Ignore(Glob::unmatched()); + } + mat.map(move |giglob| Glob(GlobInner::Matched(giglob))) + } +} + +/// Builds a matcher for a set of glob overrides. +pub struct OverrideBuilder { + builder: GitignoreBuilder, +} + +impl OverrideBuilder { + /// Create a new override builder. + /// + /// Matching is done relative to the directory path provided. + pub fn new>(path: P) -> OverrideBuilder { + OverrideBuilder { + builder: GitignoreBuilder::new(path), + } + } + + /// Builds a new override matcher from the globs added so far. + /// + /// Once a matcher is built, no new globs can be added to it. + pub fn build(&self) -> Result { + Ok(Override(try!(self.builder.build()))) + } + + /// Add a glob to the set of overrides. + /// + /// Globs provided here have precisely the same semantics as a single + /// line in a `gitignore` file, where the meaning of `!` is inverted: + /// namely, `!` at the beginning of a glob will ignore a file. Without `!`, + /// all matches of the glob provided are treated as whitelist matches. + pub fn add(&mut self, glob: &str) -> Result<&mut OverrideBuilder, Error> { + try!(self.builder.add_line(None, glob)); + Ok(self) + } +} + +#[cfg(test)] +mod tests { + use super::{Override, OverrideBuilder}; + + const ROOT: &'static str = "/home/andrew/foo"; + + fn ov(globs: &[&str]) -> Override { + let mut builder = OverrideBuilder::new(ROOT); + for glob in globs { + builder.add(glob).unwrap(); + } + builder.build().unwrap() + } + + #[test] + fn empty() { + let ov = ov(&[]); + assert!(ov.matched("a.foo", false).is_none()); + assert!(ov.matched("a", false).is_none()); + assert!(ov.matched("", false).is_none()); + } + + #[test] + fn simple() { + let ov = ov(&["*.foo", "!*.bar"]); + assert!(ov.matched("a.foo", false).is_whitelist()); + assert!(ov.matched("a.foo", true).is_whitelist()); + assert!(ov.matched("a.rs", false).is_ignore()); + assert!(ov.matched("a.rs", true).is_ignore()); + assert!(ov.matched("a.bar", false).is_ignore()); + assert!(ov.matched("a.bar", true).is_ignore()); + } + + #[test] + fn only_ignores() { + let ov = ov(&["!*.bar"]); + assert!(ov.matched("a.rs", false).is_none()); + assert!(ov.matched("a.rs", true).is_none()); + assert!(ov.matched("a.bar", false).is_ignore()); + assert!(ov.matched("a.bar", true).is_ignore()); + } + + #[test] + fn precedence() { + let ov = ov(&["*.foo", "!*.bar.foo"]); + assert!(ov.matched("a.foo", false).is_whitelist()); + assert!(ov.matched("a.baz", false).is_ignore()); + assert!(ov.matched("a.bar.foo", false).is_ignore()); + } + + #[test] + fn gitignore() { + let ov = ov(&["/foo", "bar/*.rs", "baz/**"]); + assert!(ov.matched("bar/wat/lib.rs", false).is_ignore()); + assert!(ov.matched("wat/bar/lib.rs", false).is_whitelist()); + assert!(ov.matched("foo", false).is_whitelist()); + assert!(ov.matched("wat/foo", false).is_ignore()); + assert!(ov.matched("baz", false).is_ignore()); + assert!(ov.matched("baz/a", false).is_whitelist()); + assert!(ov.matched("baz/a/b", false).is_whitelist()); + } +} diff --git a/ignore/src/pathutil.rs b/ignore/src/pathutil.rs new file mode 100644 index 000000000..bfd43de3e --- /dev/null +++ b/ignore/src/pathutil.rs @@ -0,0 +1,108 @@ +use std::ffi::OsStr; +use std::path::Path; + +/// Returns true if and only if this file path is considered to be hidden. +#[cfg(unix)] +pub fn is_hidden>(path: P) -> bool { + use std::os::unix::ffi::OsStrExt; + + if let Some(name) = file_name(path.as_ref()) { + name.as_bytes().get(0) == Some(&b'.') + } else { + false + } +} + +/// Returns true if and only if this file path is considered to be hidden. +#[cfg(not(unix))] +pub fn is_hidden>(path: P) -> bool { + if let Some(name) = file_name(path.as_ref()) { + name.to_str().map(|s| s.starts_with(".")).unwrap_or(false) + } else { + false + } +} + +/// Strip `prefix` from the `path` and return the remainder. +/// +/// If `path` doesn't have a prefix `prefix`, then return `None`. +#[cfg(unix)] +pub fn strip_prefix<'a, P: AsRef + ?Sized>( + prefix: &'a P, + path: &'a Path, +) -> Option<&'a Path> { + use std::os::unix::ffi::OsStrExt; + + let prefix = prefix.as_ref().as_os_str().as_bytes(); + let path = path.as_os_str().as_bytes(); + if prefix.len() > path.len() || prefix != &path[0..prefix.len()] { + None + } else { + Some(&Path::new(OsStr::from_bytes(&path[prefix.len()..]))) + } +} + +/// Strip `prefix` from the `path` and return the remainder. +/// +/// If `path` doesn't have a prefix `prefix`, then return `None`. +#[cfg(not(unix))] +pub fn strip_prefix<'a, P: AsRef + ?Sized>( + prefix: &'a P, + path: &'a Path, +) -> Option<&'a Path> { + path.strip_prefix(prefix).ok() +} + +/// Returns true if this file path is just a file name. i.e., Its parent is +/// the empty string. +#[cfg(unix)] +pub fn is_file_name>(path: P) -> bool { + use std::os::unix::ffi::OsStrExt; + use memchr::memchr; + + let path = path.as_ref().as_os_str().as_bytes(); + memchr(b'/', path).is_none() +} + +/// Returns true if this file path is just a file name. i.e., Its parent is +/// the empty string. +#[cfg(not(unix))] +pub fn is_file_name>(path: P) -> bool { + path.as_ref().parent().map(|p| p.as_os_str().is_empty()).unwrap_or(false) +} + +/// The final component of the path, if it is a normal file. +/// +/// If the path terminates in ., .., or consists solely of a root of prefix, +/// file_name will return None. +#[cfg(unix)] +pub fn file_name<'a, P: AsRef + ?Sized>( + path: &'a P, +) -> Option<&'a OsStr> { + use std::os::unix::ffi::OsStrExt; + use memchr::memrchr; + + let path = path.as_ref().as_os_str().as_bytes(); + if path.is_empty() { + return None; + } else if path.len() == 1 && path[0] == b'.' { + return None; + } else if path.last() == Some(&b'.') { + return None; + } else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] { + return None; + } + let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0); + Some(OsStr::from_bytes(&path[last_slash..])) +} + +/// The final component of the path, if it is a normal file. +/// +/// If the path terminates in ., .., or consists solely of a root of prefix, +/// file_name will return None. +#[cfg(not(unix))] +pub fn file_name<'a, P: AsRef + ?Sized>( + path: &'a P, +) -> Option<&'a OsStr> { + path.as_ref().file_name() +} diff --git a/ignore/src/types.rs b/ignore/src/types.rs new file mode 100644 index 000000000..8f4cd1716 --- /dev/null +++ b/ignore/src/types.rs @@ -0,0 +1,568 @@ +/*! +The types module provides a way of associating globs on file names to file +types. + +This can be used to match specific types of files. For example, among +the default file types provided, the Rust file type is defined to be `*.rs` +with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with +name `c`. + +Note that the set of default types may change over time. + +# Example + +This shows how to create and use a simple file type matcher using the default +file types defined in this crate. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.select("rust"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("foo.rs", false).is_whitelist()); +assert!(matcher.matched("foo.c", false).is_ignore()); +``` + +# Example: negation + +This is like the previous example, but shows how negating a file type works. +That is, this will let us match file paths that *don't* correspond to a +particular file type. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.negate("c"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("foo.rs", false).is_none()); +assert!(matcher.matched("foo.c", false).is_ignore()); +``` + +# Example: custom file type definitions + +This shows how to extend this library default file type definitions with +your own. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.add("foo", "*.foo"); +// Another way of adding a file type definition. +// This is useful when accepting input from an end user. +builder.add_def("bar:*.bar"); +// Note: we only select `foo`, not `bar`. +builder.select("foo"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("x.foo", false).is_whitelist()); +// This is ignored because we only selected the `foo` file type. +assert!(matcher.matched("x.bar", false).is_ignore()); +``` +*/ + +use std::cell::RefCell; +use std::collections::HashMap; +use std::path::Path; +use std::sync::Arc; + +use globset::{GlobBuilder, GlobSet, GlobSetBuilder}; +use thread_local::ThreadLocal; + +use pathutil::file_name; +use {Error, Match}; + +const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[ + ("asm", &["*.asm", "*.s", "*.S"]), + ("awk", &["*.awk"]), + ("c", &["*.c", "*.h", "*.H"]), + ("cbor", &["*.cbor"]), + ("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]), + ("cmake", &["*.cmake", "CMakeLists.txt"]), + ("coffeescript", &["*.coffee"]), + ("config", &["*.config"]), + ("cpp", &[ + "*.C", "*.cc", "*.cpp", "*.cxx", + "*.h", "*.H", "*.hh", "*.hpp", + ]), + ("csharp", &["*.cs"]), + ("css", &["*.css"]), + ("cython", &["*.pyx"]), + ("dart", &["*.dart"]), + ("d", &["*.d"]), + ("elisp", &["*.el"]), + ("erlang", &["*.erl", "*.hrl"]), + ("fortran", &[ + "*.f", "*.F", "*.f77", "*.F77", "*.pfo", + "*.f90", "*.F90", "*.f95", "*.F95", + ]), + ("fsharp", &["*.fs", "*.fsx", "*.fsi"]), + ("go", &["*.go"]), + ("groovy", &["*.groovy", "*.gradle"]), + ("hbs", &["*.hbs"]), + ("haskell", &["*.hs", "*.lhs"]), + ("html", &["*.htm", "*.html"]), + ("java", &["*.java"]), + ("jinja", &["*.jinja", "*.jinja2"]), + ("js", &[ + "*.js", "*.jsx", "*.vue", + ]), + ("json", &["*.json"]), + ("jsonl", &["*.jsonl"]), + ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]), + ("lua", &["*.lua"]), + ("m4", &["*.ac", "*.m4"]), + ("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk"]), + ("markdown", &["*.md"]), + ("md", &["*.md"]), + ("matlab", &["*.m"]), + ("mk", &["mkfile"]), + ("ml", &["*.ml"]), + ("nim", &["*.nim"]), + ("objc", &["*.h", "*.m"]), + ("objcpp", &["*.h", "*.mm"]), + ("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]), + ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]), + ("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]), + ("py", &["*.py"]), + ("readme", &["README*", "*README"]), + ("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]), + ("rst", &["*.rst"]), + ("ruby", &["*.rb"]), + ("rust", &["*.rs"]), + ("scala", &["*.scala"]), + ("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]), + ("spark", &["*.spark"]), + ("sql", &["*.sql"]), + ("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]), + ("swift", &["*.swift"]), + ("tcl", &["*.tcl"]), + ("tex", &["*.tex", "*.cls", "*.sty"]), + ("ts", &["*.ts", "*.tsx"]), + ("txt", &["*.txt"]), + ("toml", &["*.toml", "Cargo.lock"]), + ("vala", &["*.vala"]), + ("vb", &["*.vb"]), + ("vimscript", &["*.vim"]), + ("xml", &["*.xml"]), + ("yacc", &["*.y"]), + ("yaml", &["*.yaml", "*.yml"]), +]; + +/// Glob represents a single glob in a set of file type definitions. +/// +/// There may be more than one glob for a particular file type. +/// +/// This is used to report information about the highest precedent glob +/// that matched. +/// +/// Note that not all matches necessarily correspond to a specific glob. +/// For example, if there are one or more selections and a file path doesn't +/// match any of those selections, then the file path is considered to be +/// ignored. +/// +/// The lifetime `'a` refers to the lifetime of the underlying file type +/// definition, which corresponds to the lifetime of the file type matcher. +#[derive(Clone, Debug)] +pub struct Glob<'a>(GlobInner<'a>); + +#[derive(Clone, Debug)] +enum GlobInner<'a> { + /// No glob matched, but the file path should still be ignored. + UnmatchedIgnore, + /// A glob matched. + Matched { + /// The file type definition which provided the glob. + def: &'a FileTypeDef, + /// The index of the glob that matched inside the file type definition. + which: usize, + /// Whether the selection was negated or not. + negated: bool, + } +} + +impl<'a> Glob<'a> { + fn unmatched() -> Glob<'a> { + Glob(GlobInner::UnmatchedIgnore) + } +} + +/// A single file type definition. +/// +/// File type definitions can be retrieved in aggregate from a file type +/// matcher. File type definitions are also reported when its responsible +/// for a match. +#[derive(Clone, Debug)] +pub struct FileTypeDef { + name: String, + globs: Vec, +} + +impl FileTypeDef { + /// Return the name of this file type. + pub fn name(&self) -> &str { + &self.name + } + + /// Return the globs used to recognize this file type. + pub fn globs(&self) -> &[String] { + &self.globs + } +} + +/// Types is a file type matcher. +#[derive(Clone, Debug)] +pub struct Types { + /// All of the file type definitions, sorted lexicographically by name. + defs: Vec, + /// All of the selections made by the user. + selections: Vec>, + /// Whether there is at least one Selection::Select in our selections. + /// When this is true, a Match::None is converted to Match::Ignore. + has_selected: bool, + /// A mapping from glob index in the set to two indices. The first is an + /// index into `selections` and the second is an index into the + /// corresponding file type definition's list of globs. + glob_to_selection: Vec<(usize, usize)>, + /// The set of all glob selections, used for actual matching. + set: GlobSet, + /// Temporary storage for globs that match. + matches: Arc>>>, +} + +/// Indicates the type of a selection for a particular file type. +#[derive(Clone, Debug)] +enum Selection { + Select(String, T), + Negate(String, T), +} + +impl Selection { + fn is_negated(&self) -> bool { + match *self { + Selection::Select(..) => false, + Selection::Negate(..) => true, + } + } + + fn name(&self) -> &str { + match *self { + Selection::Select(ref name, _) => name, + Selection::Negate(ref name, _) => name, + } + } + + fn map U>(self, f: F) -> Selection { + match self { + Selection::Select(name, inner) => { + Selection::Select(name, f(inner)) + } + Selection::Negate(name, inner) => { + Selection::Negate(name, f(inner)) + } + } + } + + fn inner(&self) -> &T { + match *self { + Selection::Select(_, ref inner) => inner, + Selection::Negate(_, ref inner) => inner, + } + } +} + +impl Types { + /// Creates a new file type matcher that never matches any path and + /// contains no file type definitions. + pub fn empty() -> Types { + Types { + defs: vec![], + selections: vec![], + has_selected: false, + glob_to_selection: vec![], + set: GlobSetBuilder::new().build().unwrap(), + matches: Arc::new(ThreadLocal::default()), + } + } + + /// Returns true if and only if this matcher has zero selections. + pub fn is_empty(&self) -> bool { + self.selections.is_empty() + } + + /// Returns the number of selections used in this matcher. + pub fn len(&self) -> usize { + self.selections.len() + } + + /// Return the set of current file type definitions. + /// + /// Definitions and globs are sorted. + pub fn definitions(&self) -> &[FileTypeDef] { + &self.defs + } + + /// Returns a match for the given path against this file type matcher. + /// + /// The path is considered whitelisted if it matches a selected file type. + /// The path is considered ignored if it matches a negated file type. + /// If at least one file type is selected and `path` doesn't match, then + /// the path is also considered ignored. + pub fn matched<'a, P: AsRef>( + &'a self, + path: P, + is_dir: bool, + ) -> Match> { + // File types don't apply to directories, and we can't do anything + // if our glob set is empty. + if is_dir || self.set.is_empty() { + return Match::None; + } + // We only want to match against the file name, so extract it. + // If one doesn't exist, then we can't match it. + let name = match file_name(path.as_ref()) { + Some(name) => name, + None if self.has_selected => { + return Match::Ignore(Glob::unmatched()); + } + None => { + return Match::None; + } + }; + let mut matches = self.matches.get_default().borrow_mut(); + self.set.matches_into(name, &mut *matches); + // The highest precedent match is the last one. + if let Some(&i) = matches.last() { + let (isel, iglob) = self.glob_to_selection[i]; + let sel = &self.selections[isel]; + let glob = Glob(GlobInner::Matched { + def: sel.inner(), + which: iglob, + negated: sel.is_negated(), + }); + return if sel.is_negated() { + Match::Ignore(glob) + } else { + Match::Whitelist(glob) + }; + } + if self.has_selected { + Match::Ignore(Glob::unmatched()) + } else { + Match::None + } + } +} + +/// TypesBuilder builds a type matcher from a set of file type definitions and +/// a set of file type selections. +pub struct TypesBuilder { + types: HashMap, + selections: Vec>, +} + +impl TypesBuilder { + /// Create a new builder for a file type matcher. + /// + /// The builder contains *no* type definitions to start with. A set + /// of default type definitions can be added with `add_defaults`, and + /// additional type definitions can be added with `select` and `negate`. + pub fn new() -> TypesBuilder { + TypesBuilder { + types: HashMap::new(), + selections: vec![], + } + } + + /// Build the current set of file type definitions *and* selections into + /// a file type matcher. + pub fn build(&self) -> Result { + let defs = self.definitions(); + let has_selected = self.selections.iter().any(|s| !s.is_negated()); + + let mut selections = vec![]; + let mut glob_to_selection = vec![]; + let mut build_set = GlobSetBuilder::new(); + for (isel, selection) in self.selections.iter().enumerate() { + let def = match self.types.get(selection.name()) { + Some(def) => def.clone(), + None => { + let name = selection.name().to_string(); + return Err(Error::UnrecognizedFileType(name)); + } + }; + for (iglob, glob) in def.globs.iter().enumerate() { + build_set.add(try!( + GlobBuilder::new(glob) + .literal_separator(true) + .build() + .map_err(|err| Error::Glob(err.to_string())))); + glob_to_selection.push((isel, iglob)); + } + selections.push(selection.clone().map(move |_| def)); + } + let set = try!(build_set.build().map_err(|err| { + Error::Glob(err.to_string()) + })); + Ok(Types { + defs: defs, + selections: selections, + has_selected: has_selected, + glob_to_selection: glob_to_selection, + set: set, + matches: Arc::new(ThreadLocal::default()), + }) + } + + /// Return the set of current file type definitions. + /// + /// Definitions and globs are sorted. + pub fn definitions(&self) -> Vec { + let mut defs = vec![]; + for def in self.types.values() { + let mut def = def.clone(); + def.globs.sort(); + defs.push(def); + } + defs.sort_by(|def1, def2| def1.name().cmp(def2.name())); + defs + } + + /// Select the file type given by `name`. + /// + /// If `name` is `all`, then all file types currently defined are selected. + pub fn select(&mut self, name: &str) -> &mut TypesBuilder { + if name == "all" { + for name in self.types.keys() { + self.selections.push(Selection::Select(name.to_string(), ())); + } + } else { + self.selections.push(Selection::Select(name.to_string(), ())); + } + self + } + + /// Ignore the file type given by `name`. + /// + /// If `name` is `all`, then all file types currently defined are negated. + pub fn negate(&mut self, name: &str) -> &mut TypesBuilder { + if name == "all" { + for name in self.types.keys() { + self.selections.push(Selection::Negate(name.to_string(), ())); + } + } else { + self.selections.push(Selection::Negate(name.to_string(), ())); + } + self + } + + /// Clear any file type definitions for the type name given. + pub fn clear(&mut self, name: &str) -> &mut TypesBuilder { + self.types.remove(name); + self + } + + /// Add a new file type definition. `name` can be arbitrary and `pat` + /// should be a glob recognizing file paths belonging to the `name` type. + /// + /// If `name` is `all` or otherwise contains a `:`, then an error is + /// returned. + pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> { + if name == "all" || name.contains(':') { + return Err(Error::InvalidDefinition); + } + let (key, glob) = (name.to_string(), glob.to_string()); + self.types.entry(key).or_insert_with(|| { + FileTypeDef { name: name.to_string(), globs: vec![] } + }).globs.push(glob); + Ok(()) + } + + /// Add a new file type definition specified in string form. The format + /// is `name:glob`. Names may not include a colon. + pub fn add_def(&mut self, def: &str) -> Result<(), Error> { + let name: String = def.chars().take_while(|&c| c != ':').collect(); + let pat: String = def.chars().skip(name.chars().count() + 1).collect(); + if name.is_empty() || pat.is_empty() { + return Err(Error::InvalidDefinition); + } + self.add(&name, &pat) + } + + /// Add a set of default file type definitions. + pub fn add_defaults(&mut self) -> &mut TypesBuilder { + static MSG: &'static str = "adding a default type should never fail"; + for &(name, exts) in DEFAULT_TYPES { + for ext in exts { + self.add(name, ext).expect(MSG); + } + } + self + } +} + +#[cfg(test)] +mod tests { + use super::TypesBuilder; + + macro_rules! matched { + ($name:ident, $types:expr, $sel:expr, $selnot:expr, + $path:expr) => { + matched!($name, $types, $sel, $selnot, $path, true); + }; + (not, $name:ident, $types:expr, $sel:expr, $selnot:expr, + $path:expr) => { + matched!($name, $types, $sel, $selnot, $path, false); + }; + ($name:ident, $types:expr, $sel:expr, $selnot:expr, + $path:expr, $matched:expr) => { + #[test] + fn $name() { + let mut btypes = TypesBuilder::new(); + for tydef in $types { + btypes.add_def(tydef).unwrap(); + } + for sel in $sel { + btypes.select(sel); + } + for selnot in $selnot { + btypes.negate(selnot); + } + let types = btypes.build().unwrap(); + let mat = types.matched($path, false); + assert_eq!($matched, !mat.is_ignore()); + } + }; + } + + fn types() -> Vec<&'static str> { + vec![ + "html:*.html", + "html:*.htm", + "rust:*.rs", + "js:*.js", + "foo:*.{rs,foo}", + ] + } + + matched!(match1, types(), vec!["rust"], vec![], "lib.rs"); + matched!(match2, types(), vec!["html"], vec![], "index.html"); + matched!(match3, types(), vec!["html"], vec![], "index.htm"); + matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs"); + matched!(match5, types(), vec![], vec![], "index.html"); + matched!(match6, types(), vec![], vec!["rust"], "index.html"); + matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo"); + + matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html"); + matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs"); + matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs"); + matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs"); + matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo"); +} diff --git a/ignore/src/walk.rs b/ignore/src/walk.rs new file mode 100644 index 000000000..0bcc6136f --- /dev/null +++ b/ignore/src/walk.rs @@ -0,0 +1,592 @@ +use std::ffi::OsStr; +use std::fs::{FileType, Metadata}; +use std::io; +use std::path::{Path, PathBuf}; +use std::vec; + +use walkdir::{self, WalkDir, WalkDirIterator}; + +use dir::{Ignore, IgnoreBuilder}; +use gitignore::GitignoreBuilder; +use overrides::Override; +use types::Types; +use {Error, PartialErrorBuilder}; + +/// WalkBuilder builds a recursive directory iterator. +/// +/// The builder supports a large number of configurable options. This includes +/// specific glob overrides, file type matching, toggling whether hidden +/// files are ignored or not, and of course, support for respecting gitignore +/// files. +/// +/// By default, all ignore files found are respected. This includes `.ignore`, +/// `.gitignore`, `.git/info/exclude` and even your global gitignore +/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`. +/// +/// Some standard recursive directory options are also supported, such as +/// limiting the recursive depth or whether to follow symbolic links (disabled +/// by default). +/// +/// # Ignore rules +/// +/// There are many rules that influence whether a particular file or directory +/// is skipped by this iterator. Those rules are documented here. Note that +/// the rules assume a default configuration. +/// +/// * First, glob overrides are checked. If a path matches a glob override, +/// then matching stops. The path is then only skipped if the glob that matched +/// the path is an ignore glob. (An override glob is a whitelist glob unless it +/// starts with a `!`, in which case it is an ignore glob.) +/// * Second, ignore files are checked. Ignore files currently only come from +/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured +/// global gitignore file), plain `.ignore` files, which have the same format +/// as gitignore files, or explicitly added ignore files. The precedence order +/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and +/// finally explicitly added ignore files. Note that precedence between +/// different types of ignore files is not impacted by the directory hierarchy; +/// any `.ignore` file overrides all `.gitignore` files. Within each +/// precedence level, more nested ignore files have a higher precedence over +/// less nested ignore files. +/// * Third, if the previous step yields an ignore match, than all matching +/// is stopped and the path is skipped.. If it yields a whitelist match, then +/// process continues. A whitelist match can be overridden by a later matcher. +/// * Fourth, unless the path is a directory, the file type matcher is run on +/// the path. As above, if it's an ignore match, then all matching is stopped +/// and the path is skipped. If it's a whitelist match, then matching +/// continues. +/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the +/// path is skipped. +/// * Sixth, if the path has made it this far then it is yielded in the +/// iterator. +pub struct WalkBuilder { + paths: Vec, + ig_builder: IgnoreBuilder, + parents: bool, + max_depth: Option, + follow_links: bool, +} + +impl WalkBuilder { + /// Create a new builder for a recursive directory iterator for the + /// directory given. + /// + /// Note that if you want to traverse multiple different directories, it + /// is better to call `add` on this builder than to create multiple + /// `Walk` values. + pub fn new>(path: P) -> WalkBuilder { + WalkBuilder { + paths: vec![path.as_ref().to_path_buf()], + ig_builder: IgnoreBuilder::new(), + parents: true, + max_depth: None, + follow_links: false, + } + } + + /// Build a new `Walk` iterator. + pub fn build(&self) -> Walk { + let follow_links = self.follow_links; + let max_depth = self.max_depth; + let its = self.paths.iter().map(move |p| { + if p == Path::new("-") { + (p.to_path_buf(), None) + } else { + let mut wd = WalkDir::new(p); + wd = wd.follow_links(follow_links || p.is_file()); + if let Some(max_depth) = max_depth { + wd = wd.max_depth(max_depth); + } + (p.to_path_buf(), Some(WalkEventIter::from(wd))) + } + }).collect::>().into_iter(); + let ig_root = self.ig_builder.build(); + Walk { + its: its, + it: None, + ig_root: ig_root.clone(), + ig: ig_root.clone(), + parents: self.parents, + } + } + + /// Add a file path to the iterator. + /// + /// Each additional file path added is traversed recursively. This should + /// be preferred over building multiple `Walk` iterators since this + /// enables reusing resources across iteration. + pub fn add>(&mut self, path: P) -> &mut WalkBuilder { + self.paths.push(path.as_ref().to_path_buf()); + self + } + + /// The maximum depth to recurse. + /// + /// The default, `None`, imposes no depth restriction. + pub fn max_depth(&mut self, depth: Option) -> &mut WalkBuilder { + self.max_depth = depth; + self + } + + /// Whether to follow symbolic links or not. + pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder { + self.follow_links = yes; + self + } + + /// Add an ignore file to the matcher. + /// + /// This has lower precedence than all other sources of ignore rules. + /// + /// If there was a problem adding the ignore file, then an error is + /// returned. Note that the error may indicate *partial* failure. For + /// example, if an ignore file contains an invalid glob, all other globs + /// are still applied. + pub fn add_ignore>(&mut self, path: P) -> Option { + let mut builder = GitignoreBuilder::new(""); + let mut errs = PartialErrorBuilder::default(); + errs.maybe_push_ignore_io(builder.add(path)); + match builder.build() { + Ok(gi) => { self.ig_builder.add_ignore(gi); } + Err(err) => { errs.push(err); } + } + errs.into_error_option() + } + + /// Add an override matcher. + /// + /// By default, no override matcher is used. + /// + /// This overrides any previous setting. + pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder { + self.ig_builder.overrides(overrides); + self + } + + /// Add a file type matcher. + /// + /// By default, no file type matcher is used. + /// + /// This overrides any previous setting. + pub fn types(&mut self, types: Types) -> &mut WalkBuilder { + self.ig_builder.types(types); + self + } + + /// Enables ignoring hidden files. + /// + /// This is enabled by default. + pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.hidden(yes); + self + } + + /// Enables reading ignore files from parent directories. + /// + /// If this is enabled, then the parent directories of each file path given + /// are traversed for ignore files (subject to the ignore settings on + /// this builder). Note that file paths are canonicalized with respect to + /// the current working directory in order to determine parent directories. + /// + /// This is enabled by default. + pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder { + self.parents = yes; + self + } + + /// Enables reading `.ignore` files. + /// + /// `.ignore` files have the same semantics as `gitignore` files and are + /// supported by search tools such as ripgrep and The Silver Searcher. + /// + /// This is enabled by default. + pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.ignore(yes); + self + } + + /// Enables reading a global gitignore file, whose path is specified in + /// git's `core.excludesFile` config option. + /// + /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig` + /// does not exist or does not specify `core.excludesFile`, then + /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not + /// set or is empty, then `$HOME/.config/git/ignore` is used instead. + pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_global(yes); + self + } + + /// Enables reading `.gitignore` files. + /// + /// `.gitignore` files have match semantics as described in the `gitignore` + /// man page. + /// + /// This is enabled by default. + pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_ignore(yes); + self + } + + /// Enables reading `.git/info/exclude` files. + /// + /// `.git/info/exclude` files have match semantics as described in the + /// `gitignore` man page. + /// + /// This is enabled by default. + pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_exclude(yes); + self + } +} + +/// Walk is a recursive directory iterator over file paths in a directory. +/// +/// Only file and directory paths matching the rules are returned. By default, +/// ignore files like `.gitignore` are respected. The precise matching rules +/// and precedence is explained in the documentation for `WalkBuilder`. +pub struct Walk { + its: vec::IntoIter<(PathBuf, Option)>, + it: Option, + ig_root: Ignore, + ig: Ignore, + parents: bool, +} + +impl Walk { + /// Creates a new recursive directory iterator for the file path given. + /// + /// Note that this uses default settings, which include respecting + /// `.gitignore` files. To configure the iterator, use `WalkBuilder` + /// instead. + pub fn new>(path: P) -> Walk { + WalkBuilder::new(path).build() + } + + fn skip_entry(&self, ent: &walkdir::DirEntry) -> bool { + if ent.depth() == 0 { + // Never skip the root directory. + return false; + } + let m = self.ig.matched(ent.path(), ent.file_type().is_dir()); + if m.is_ignore() { + debug!("ignoring {}: {:?}", ent.path().display(), m); + return true; + } else if m.is_whitelist() { + debug!("whitelisting {}: {:?}", ent.path().display(), m); + } + false + } +} + +impl Iterator for Walk { + type Item = Result; + + #[inline(always)] + fn next(&mut self) -> Option> { + loop { + let ev = match self.it.as_mut().and_then(|it| it.next()) { + Some(ev) => ev, + None => { + match self.its.next() { + None => return None, + Some((_, None)) => { + return Some(Ok(DirEntry { + dent: None, + err: None, + })); + } + Some((path, Some(it))) => { + self.it = Some(it); + if self.parents && path.is_dir() { + let (ig, err) = self.ig_root.add_parents(path); + self.ig = ig; + if let Some(err) = err { + return Some(Err(err)); + } + } else { + self.ig = self.ig_root.clone(); + } + } + } + continue; + } + }; + match ev { + Err(err) => { + let path = err.path().map(|p| p.to_path_buf()); + let mut ig_err = Error::Io(io::Error::from(err)); + if let Some(path) = path { + ig_err = Error::WithPath { + path: path.to_path_buf(), + err: Box::new(ig_err), + }; + } + return Some(Err(ig_err)); + } + Ok(WalkEvent::Exit) => { + self.ig = self.ig.parent().unwrap(); + } + Ok(WalkEvent::Dir(ent)) => { + if self.skip_entry(&ent) { + self.it.as_mut().unwrap().it.skip_current_dir(); + // Still need to push this on the stack because + // we'll get a WalkEvent::Exit event for this dir. + // We don't care if it errors though. + let (igtmp, _) = self.ig.add_child(ent.path()); + self.ig = igtmp; + continue; + } + let (igtmp, err) = self.ig.add_child(ent.path()); + self.ig = igtmp; + return Some(Ok(DirEntry { dent: Some(ent), err: err })); + } + Ok(WalkEvent::File(ent)) => { + if self.skip_entry(&ent) { + continue; + } + // If this isn't actually a file (e.g., a symlink), + // then skip it. + if !ent.file_type().is_file() { + continue; + } + return Some(Ok(DirEntry { dent: Some(ent), err: None })); + } + } + } + } +} + +/// A directory entry with a possible error attached. +/// +/// The error typically refers to a problem parsing ignore files in a +/// particular directory. +#[derive(Debug)] +pub struct DirEntry { + dent: Option, + err: Option, +} + +impl DirEntry { + /// The full path that this entry represents. + pub fn path(&self) -> &Path { + self.dent.as_ref().map_or(Path::new(""), |x| x.path()) + } + + /// Whether this entry corresponds to a symbolic link or not. + pub fn path_is_symbolic_link(&self) -> bool { + self.dent.as_ref().map_or(false, |x| x.path_is_symbolic_link()) + } + + /// Returns true if and only if this entry corresponds to stdin. + /// + /// i.e., The entry has depth 0 and its file name is `-`. + pub fn is_stdin(&self) -> bool { + self.dent.is_none() + } + + /// Return the metadata for the file that this entry points to. + pub fn metadata(&self) -> Result { + if let Some(dent) = self.dent.as_ref() { + dent.metadata().map_err(|err| Error::WithPath { + path: self.path().to_path_buf(), + err: Box::new(Error::Io(io::Error::from(err))), + }) + } else { + let ioerr = io::Error::new( + io::ErrorKind::Other, "stdin has no metadata"); + Err(Error::WithPath { + path: Path::new("").to_path_buf(), + err: Box::new(Error::Io(ioerr)), + }) + } + } + + /// Return the file type for the file that this entry points to. + /// + /// This entry doesn't have a file type if it corresponds to stdin. + pub fn file_type(&self) -> Option { + self.dent.as_ref().map(|x| x.file_type()) + } + + /// Return the file name of this entry. + /// + /// If this entry has no file name (e.g., `/`), then the full path is + /// returned. + pub fn file_name(&self) -> &OsStr { + self.dent.as_ref().map_or(OsStr::new(""), |x| x.file_name()) + } + + /// Returns the depth at which this entry was created relative to the root. + pub fn depth(&self) -> usize { + self.dent.as_ref().map_or(0, |x| x.depth()) + } + + /// Returns an error, if one exists, associated with processing this entry. + /// + /// An example of an error is one that occurred while parsing an ignore + /// file. + pub fn error(&self) -> Option<&Error> { + self.err.as_ref() + } +} + +/// WalkEventIter transforms a WalkDir iterator into an iterator that more +/// accurately describes the directory tree. Namely, it emits events that are +/// one of three types: directory, file or "exit." An "exit" event means that +/// the entire contents of a directory have been enumerated. +struct WalkEventIter { + depth: usize, + it: walkdir::Iter, + next: Option>, +} + +#[derive(Debug)] +enum WalkEvent { + Dir(walkdir::DirEntry), + File(walkdir::DirEntry), + Exit, +} + +impl From for WalkEventIter { + fn from(it: WalkDir) -> WalkEventIter { + WalkEventIter { depth: 0, it: it.into_iter(), next: None } + } +} + +impl Iterator for WalkEventIter { + type Item = walkdir::Result; + + #[inline(always)] + fn next(&mut self) -> Option> { + let dent = self.next.take().or_else(|| self.it.next()); + let depth = match dent { + None => 0, + Some(Ok(ref dent)) => dent.depth(), + Some(Err(ref err)) => err.depth(), + }; + if depth < self.depth { + self.depth -= 1; + self.next = dent; + return Some(Ok(WalkEvent::Exit)); + } + self.depth = depth; + match dent { + None => None, + Some(Err(err)) => Some(Err(err)), + Some(Ok(dent)) => { + if dent.file_type().is_dir() { + self.depth += 1; + Some(Ok(WalkEvent::Dir(dent))) + } else { + Some(Ok(WalkEvent::File(dent))) + } + } + } + } +} + +#[cfg(test)] +mod tests { + use std::fs::{self, File}; + use std::io::Write; + use std::path::Path; + + use tempdir::TempDir; + + use super::{Walk, WalkBuilder}; + + fn wfile>(path: P, contents: &str) { + let mut file = File::create(path).unwrap(); + file.write_all(contents.as_bytes()).unwrap(); + } + + fn mkdirp>(path: P) { + fs::create_dir_all(path).unwrap(); + } + + fn normal_path(unix: &str) -> String { + if cfg!(windows) { + unix.replace("\\", "/") + } else { + unix.to_string() + } + } + + fn walk_collect(prefix: &Path, walk: Walk) -> Vec { + let mut paths = vec![]; + for dent in walk { + let dent = dent.unwrap(); + let path = dent.path().strip_prefix(prefix).unwrap(); + if path.as_os_str().is_empty() { + continue; + } + paths.push(normal_path(path.to_str().unwrap())); + } + paths.sort(); + paths + } + + fn mkpaths(paths: &[&str]) -> Vec { + let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect(); + paths.sort(); + paths + } + + #[test] + fn no_ignores() { + let td = TempDir::new("walk-test-").unwrap(); + mkdirp(td.path().join("a/b/c")); + mkdirp(td.path().join("x/y")); + wfile(td.path().join("a/b/foo"), ""); + wfile(td.path().join("x/y/foo"), ""); + + let got = walk_collect(td.path(), Walk::new(td.path())); + assert_eq!(got, mkpaths(&[ + "x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c", + ])); + } + + #[test] + fn gitignore() { + let td = TempDir::new("walk-test-").unwrap(); + mkdirp(td.path().join("a")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let got = walk_collect(td.path(), Walk::new(td.path())); + assert_eq!(got, mkpaths(&["bar", "a", "a/bar"])); + } + + #[test] + fn explicit_ignore() { + let td = TempDir::new("walk-test-").unwrap(); + let igpath = td.path().join(".not-an-ignore"); + mkdirp(td.path().join("a")); + wfile(&igpath, "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + assert!(builder.add_ignore(&igpath).is_none()); + let got = walk_collect(td.path(), builder.build()); + assert_eq!(got, mkpaths(&["bar", "a", "a/bar"])); + } + + #[test] + fn gitignore_parent() { + let td = TempDir::new("walk-test-").unwrap(); + mkdirp(td.path().join("a")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("a/bar"), ""); + + let root = td.path().join("a"); + let got = walk_collect(&root, Walk::new(&root)); + assert_eq!(got, mkpaths(&["bar"])); + } +} diff --git a/src/args.rs b/src/args.rs index 012d91502..9d2923b87 100644 --- a/src/args.rs +++ b/src/args.rs @@ -14,19 +14,17 @@ use term::Terminal; use term; #[cfg(windows)] use term::WinConsole; -use walkdir::WalkDir; use atty; -use gitignore::{Gitignore, GitignoreBuilder}; -use ignore::Ignore; +use ignore::overrides::{Override, OverrideBuilder}; +use ignore::types::{FileTypeDef, Types, TypesBuilder}; +use ignore; use out::{Out, ColoredTerminal}; use printer::Printer; use search_buffer::BufferSearcher; use search_stream::{InputBuffer, Searcher}; #[cfg(windows)] use terminal_win::WindowsBuffer; -use types::{FileTypeDef, Types, TypesBuilder}; -use walk; use Result; @@ -131,6 +129,13 @@ Less common options: Search hidden directories and files. (Hidden directories and files are skipped by default.) + --ignore-file FILE ... + Specify additional ignore files for filtering file paths. Ignore files + should be in the gitignore format and are matched relative to the + current working directory. These ignore files have lower precedence + than all other ignore file types. When specifying multiple ignore + files, earlier files have lower precedence than later files. + -L, --follow Follow symlinks. @@ -234,6 +239,7 @@ pub struct RawArgs { flag_heading: bool, flag_hidden: bool, flag_ignore_case: bool, + flag_ignore_file: Vec, flag_invert_match: bool, flag_line_number: bool, flag_fixed_strings: bool, @@ -279,11 +285,12 @@ pub struct Args { eol: u8, files: bool, follow: bool, - glob_overrides: Option, + glob_overrides: Override, grep: Grep, heading: bool, hidden: bool, ignore_case: bool, + ignore_files: Vec, invert_match: bool, line_number: bool, line_per_match: bool, @@ -347,14 +354,13 @@ impl RawArgs { } let glob_overrides = if self.flag_glob.is_empty() { - None + Override::empty() } else { - let cwd = try!(env::current_dir()); - let mut bgi = GitignoreBuilder::new(cwd); + let mut ovr = OverrideBuilder::new(try!(env::current_dir())); for pat in &self.flag_glob { - try!(bgi.add("", pat)); + try!(ovr.add(pat)); } - Some(try!(bgi.build())) + try!(ovr.build()) }; let threads = if self.flag_threads == 0 { @@ -382,6 +388,9 @@ impl RawArgs { let no_ignore = self.flag_no_ignore || self.flag_unrestricted >= 1; let hidden = self.flag_hidden || self.flag_unrestricted >= 2; let text = self.flag_text || self.flag_unrestricted >= 3; + let ignore_files: Vec<_> = self.flag_ignore_file.iter().map(|p| { + Path::new(p).to_path_buf() + }).collect(); let mut args = Args { paths: paths, after_context: after_context, @@ -399,6 +408,7 @@ impl RawArgs { heading: !self.flag_no_heading && self.flag_heading, hidden: hidden, ignore_case: self.flag_ignore_case, + ignore_files: ignore_files, invert_match: self.flag_invert_match, line_number: !self.flag_no_line_number && self.flag_line_number, line_per_match: self.flag_vimgrep, @@ -711,31 +721,30 @@ impl Args { self.type_list } - /// Create a new recursive directory iterator at the path given. - pub fn walker(&self, path: &Path) -> Result { - // Always follow symlinks for explicitly specified files. - let mut wd = WalkDir::new(path).follow_links( - self.follow || path.is_file()); - if let Some(maxdepth) = self.maxdepth { - wd = wd.max_depth(maxdepth); + /// Create a new recursive directory iterator over the paths in argv. + pub fn walker(&self) -> Walk { + let paths = self.paths(); + let mut wd = ignore::WalkBuilder::new(&paths[0]); + for path in &paths[1..] { + wd.add(path); } - let mut ig = Ignore::new(); - // Only register ignore rules if this is a directory. If it's a file, - // then it was explicitly given by the end user, so we always search - // it. - if path.is_dir() { - ig.ignore_hidden(!self.hidden); - ig.no_ignore(self.no_ignore); - ig.no_ignore_vcs(self.no_ignore_vcs); - ig.add_types(self.types.clone()); - if !self.no_ignore_parent { - try!(ig.push_parents(path)); - } - if let Some(ref overrides) = self.glob_overrides { - ig.add_override(overrides.clone()); + for path in &self.ignore_files { + if let Some(err) = wd.add_ignore(path) { + eprintln!("{}", err); } } - Ok(walk::Iter::new(ig, wd)) + + wd.follow_links(self.follow); + wd.hidden(!self.hidden); + wd.max_depth(self.maxdepth); + wd.overrides(self.glob_overrides.clone()); + wd.types(self.types.clone()); + wd.git_global(!self.no_ignore && !self.no_ignore_vcs); + wd.git_ignore(!self.no_ignore && !self.no_ignore_vcs); + wd.git_exclude(!self.no_ignore && !self.no_ignore_vcs); + wd.ignore(!self.no_ignore); + wd.parents(!self.no_ignore_parent); + Walk(wd.build()) } } @@ -752,6 +761,34 @@ fn version() -> String { } } +/// A simple wrapper around the ignore::Walk iterator. This will +/// automatically emit error messages to stderr and will skip directories. +pub struct Walk(ignore::Walk); + +impl Iterator for Walk { + type Item = ignore::DirEntry; + + fn next(&mut self) -> Option { + while let Some(result) = self.0.next() { + match result { + Ok(dent) => { + if let Some(err) = dent.error() { + eprintln!("{}", err); + } + if dent.file_type().map_or(false, |x| x.is_dir()) { + continue; + } + return Some(dent); + } + Err(err) => { + eprintln!("{}", err); + } + } + } + None + } +} + /// A single state in the state machine used by `unescape`. #[derive(Clone, Copy, Eq, PartialEq)] enum State { @@ -761,7 +798,7 @@ enum State { Literal, } -/// Unescapes a string given on the command line. It supports a limit set of +/// Unescapes a string given on the command line. It supports a limited set of /// escape sequences: /// /// * \t, \r and \n are mapped to their corresponding ASCII bytes. diff --git a/src/gitignore.rs b/src/gitignore.rs deleted file mode 100644 index 9daeb3cb6..000000000 --- a/src/gitignore.rs +++ /dev/null @@ -1,455 +0,0 @@ -/*! -The gitignore module provides a way of reading a gitignore file and applying -it to a particular file name to determine whether it should be ignore or not. -The motivation for this submodule is performance and portability: - -1. There is a gitignore crate on crates.io, but it uses the standard `glob` - crate and checks patterns one-by-one. This is a reasonable implementation, - but not suitable for the performance we need here. -2. We could shell out to a `git` sub-command like ls-files or status, but it - seems better to not rely on the existence of external programs for a search - tool. Besides, we need to implement this logic anyway to support things like - an .ignore file. - -The key implementation detail here is that a single gitignore file is compiled -into a single RegexSet, which can be used to report which globs match a -particular file name. We can then do a quick post-processing step to implement -additional rules such as whitelists (prefix of `!`) or directory-only globs -(suffix of `/`). -*/ - -// TODO(burntsushi): Implement something similar, but for Mercurial. We can't -// use this exact implementation because hgignore files are different. - -use std::cell::RefCell; -use std::error::Error as StdError; -use std::fmt; -use std::fs::File; -use std::io::{self, BufRead}; -use std::path::{Path, PathBuf}; -use std::sync::Arc; - -use globset::{self, Candidate, GlobBuilder, GlobSet, GlobSetBuilder}; -use regex; -use thread_local::ThreadLocal; - -use pathutil::{is_file_name, strip_prefix}; - -/// Represents an error that can occur when parsing a gitignore file. -#[derive(Debug)] -pub enum Error { - Glob(globset::Error), - Regex(regex::Error), - Io(io::Error), -} - -impl StdError for Error { - fn description(&self) -> &str { - match *self { - Error::Glob(ref err) => err.description(), - Error::Regex(ref err) => err.description(), - Error::Io(ref err) => err.description(), - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::Glob(ref err) => err.fmt(f), - Error::Regex(ref err) => err.fmt(f), - Error::Io(ref err) => err.fmt(f), - } - } -} - -impl From for Error { - fn from(err: globset::Error) -> Error { - Error::Glob(err) - } -} - -impl From for Error { - fn from(err: regex::Error) -> Error { - Error::Regex(err) - } -} - -impl From for Error { - fn from(err: io::Error) -> Error { - Error::Io(err) - } -} - -/// Gitignore is a matcher for the glob patterns in a single gitignore file. -#[derive(Clone, Debug)] -pub struct Gitignore { - set: GlobSet, - root: PathBuf, - patterns: Vec, - num_ignores: u64, - num_whitelist: u64, - matches: Arc>>>, -} - -impl Gitignore { - /// Create a new gitignore glob matcher from the given root directory and - /// string containing the contents of a gitignore file. - #[allow(dead_code)] - fn from_str>( - root: P, - gitignore: &str, - ) -> Result { - let mut builder = GitignoreBuilder::new(root); - try!(builder.add_str(gitignore)); - builder.build() - } - - /// Returns true if and only if the given file path should be ignored - /// according to the globs in this gitignore. `is_dir` should be true if - /// the path refers to a directory and false otherwise. - /// - /// Before matching path, its prefix (as determined by a common suffix - /// of the directory containing this gitignore) is stripped. If there is - /// no common suffix/prefix overlap, then path is assumed to reside in the - /// same directory as this gitignore file. - pub fn matched>(&self, path: P, is_dir: bool) -> Match { - let mut path = path.as_ref(); - if let Some(p) = strip_prefix("./", path) { - path = p; - } - // Strip any common prefix between the candidate path and the root - // of the gitignore, to make sure we get relative matching right. - // BUT, a file name might not have any directory components to it, - // in which case, we don't want to accidentally strip any part of the - // file name. - if !is_file_name(path) { - if let Some(p) = strip_prefix(&self.root, path) { - path = p; - } - } - if let Some(p) = strip_prefix("/", path) { - path = p; - } - self.matched_stripped(path, is_dir) - } - - /// Like matched, but takes a path that has already been stripped. - pub fn matched_stripped(&self, path: &Path, is_dir: bool) -> Match { - let _matches = self.matches.get_default(); - let mut matches = _matches.borrow_mut(); - let candidate = Candidate::new(path); - self.set.matches_candidate_into(&candidate, &mut *matches); - for &i in matches.iter().rev() { - let pat = &self.patterns[i]; - if !pat.only_dir || is_dir { - return if pat.whitelist { - Match::Whitelist(pat) - } else { - Match::Ignored(pat) - }; - } - } - Match::None - } - - /// Returns the total number of ignore patterns. - pub fn num_ignores(&self) -> u64 { - self.num_ignores - } -} - -/// The result of a glob match. -/// -/// The lifetime `'a` refers to the lifetime of the pattern that resulted in -/// a match (whether ignored or whitelisted). -#[derive(Clone, Debug)] -pub enum Match<'a> { - /// The path didn't match any glob in the gitignore file. - None, - /// The last glob matched indicates the path should be ignored. - Ignored(&'a Pattern), - /// The last glob matched indicates the path should be whitelisted. - Whitelist(&'a Pattern), -} - -impl<'a> Match<'a> { - /// Returns true if the match result implies the path should be ignored. - #[allow(dead_code)] - pub fn is_ignored(&self) -> bool { - match *self { - Match::Ignored(_) => true, - Match::None | Match::Whitelist(_) => false, - } - } - - /// Returns true if the match result didn't match any globs. - pub fn is_none(&self) -> bool { - match *self { - Match::None => true, - Match::Ignored(_) | Match::Whitelist(_) => false, - } - } - - /// Inverts the match so that Ignored becomes Whitelisted and Whitelisted - /// becomes Ignored. A non-match remains the same. - pub fn invert(self) -> Match<'a> { - match self { - Match::None => Match::None, - Match::Ignored(pat) => Match::Whitelist(pat), - Match::Whitelist(pat) => Match::Ignored(pat), - } - } -} - -/// GitignoreBuilder constructs a matcher for a single set of globs from a -/// .gitignore file. -pub struct GitignoreBuilder { - builder: GlobSetBuilder, - root: PathBuf, - patterns: Vec, -} - -/// Pattern represents a single pattern in a gitignore file. It doesn't -/// know how to do glob matching directly, but it does store additional -/// options on a pattern, such as whether it's whitelisted. -#[derive(Clone, Debug)] -pub struct Pattern { - /// The file path that this pattern was extracted from (may be empty). - pub from: PathBuf, - /// The original glob pattern string. - pub original: String, - /// The actual glob pattern string used to convert to a regex. - pub pat: String, - /// Whether this is a whitelisted pattern or not. - pub whitelist: bool, - /// Whether this pattern should only match directories or not. - pub only_dir: bool, -} - -impl GitignoreBuilder { - /// Create a new builder for a gitignore file. - /// - /// The path given should be the path at which the globs for this gitignore - /// file should be matched. - pub fn new>(root: P) -> GitignoreBuilder { - let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref()); - GitignoreBuilder { - builder: GlobSetBuilder::new(), - root: root.to_path_buf(), - patterns: vec![], - } - } - - /// Builds a new matcher from the glob patterns added so far. - /// - /// Once a matcher is built, no new glob patterns can be added to it. - pub fn build(self) -> Result { - let nignores = self.patterns.iter().filter(|p| !p.whitelist).count(); - let nwhitelist = self.patterns.iter().filter(|p| p.whitelist).count(); - Ok(Gitignore { - set: try!(self.builder.build()), - root: self.root, - patterns: self.patterns, - num_ignores: nignores as u64, - num_whitelist: nwhitelist as u64, - matches: Arc::new(ThreadLocal::default()), - }) - } - - /// Add each pattern line from the file path given. - pub fn add_path>(&mut self, path: P) -> Result<(), Error> { - let rdr = io::BufReader::new(try!(File::open(&path))); - debug!("gitignore: {}", path.as_ref().display()); - for (i, line) in rdr.lines().enumerate() { - let line = match line { - Ok(line) => line, - Err(err) => { - debug!("error reading line {} in {}: {}", - i, path.as_ref().display(), err); - continue; - } - }; - if let Err(err) = self.add(&path, &line) { - debug!("error adding gitignore pattern: '{}': {}", line, err); - } - } - Ok(()) - } - - /// Add each pattern line from the string given. - pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> { - for line in gitignore.lines() { - try!(self.add("", line)); - } - Ok(()) - } - - /// Add a line from a gitignore file to this builder. - /// - /// If the line could not be parsed as a glob, then an error is returned. - pub fn add>( - &mut self, - from: P, - mut line: &str, - ) -> Result<(), Error> { - if line.starts_with("#") { - return Ok(()); - } - if !line.ends_with("\\ ") { - line = line.trim_right(); - } - if line.is_empty() { - return Ok(()); - } - let mut pat = Pattern { - from: from.as_ref().to_path_buf(), - original: line.to_string(), - pat: String::new(), - whitelist: false, - only_dir: false, - }; - let mut literal_separator = false; - let has_slash = line.chars().any(|c| c == '/'); - let is_absolute = line.chars().nth(0).unwrap() == '/'; - if line.starts_with("\\!") || line.starts_with("\\#") { - line = &line[1..]; - } else { - if line.starts_with("!") { - pat.whitelist = true; - line = &line[1..]; - } - if line.starts_with("/") { - // `man gitignore` says that if a glob starts with a slash, - // then the glob can only match the beginning of a path - // (relative to the location of gitignore). We achieve this by - // simply banning wildcards from matching /. - literal_separator = true; - line = &line[1..]; - } - } - // If it ends with a slash, then this should only match directories, - // but the slash should otherwise not be used while globbing. - if let Some((i, c)) = line.char_indices().rev().nth(0) { - if c == '/' { - pat.only_dir = true; - line = &line[..i]; - } - } - // If there is a literal slash, then we note that so that globbing - // doesn't let wildcards match slashes. - pat.pat = line.to_string(); - if has_slash { - literal_separator = true; - } - // If there was a leading slash, then this is a pattern that must - // match the entire path name. Otherwise, we should let it match - // anywhere, so use a **/ prefix. - if !is_absolute { - // ... but only if we don't already have a **/ prefix. - if !pat.pat.starts_with("**/") { - pat.pat = format!("**/{}", pat.pat); - } - } - // If the pattern ends with `/**`, then we should only match everything - // inside a directory, but not the directory itself. Standard globs - // will match the directory. So we add `/*` to force the issue. - if pat.pat.ends_with("/**") { - pat.pat = format!("{}/*", pat.pat); - } - let parsed = try!( - GlobBuilder::new(&pat.pat) - .literal_separator(literal_separator) - .build()); - self.builder.add(parsed); - self.patterns.push(pat); - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::Gitignore; - - macro_rules! ignored { - ($name:ident, $root:expr, $gi:expr, $path:expr) => { - ignored!($name, $root, $gi, $path, false); - }; - ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { - #[test] - fn $name() { - let gi = Gitignore::from_str($root, $gi).unwrap(); - assert!(gi.matched($path, $is_dir).is_ignored()); - } - }; - } - - macro_rules! not_ignored { - ($name:ident, $root:expr, $gi:expr, $path:expr) => { - not_ignored!($name, $root, $gi, $path, false); - }; - ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { - #[test] - fn $name() { - let gi = Gitignore::from_str($root, $gi).unwrap(); - assert!(!gi.matched($path, $is_dir).is_ignored()); - } - }; - } - - const ROOT: &'static str = "/home/foobar/rust/rg"; - - ignored!(ig1, ROOT, "months", "months"); - ignored!(ig2, ROOT, "*.lock", "Cargo.lock"); - ignored!(ig3, ROOT, "*.rs", "src/main.rs"); - ignored!(ig4, ROOT, "src/*.rs", "src/main.rs"); - ignored!(ig5, ROOT, "/*.c", "cat-file.c"); - ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs"); - ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs"); - ignored!(ig8, ROOT, "foo/", "foo", true); - ignored!(ig9, ROOT, "**/foo", "foo"); - ignored!(ig10, ROOT, "**/foo", "src/foo"); - ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar"); - ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz"); - ignored!(ig13, ROOT, "**/foo/bar", "foo/bar"); - ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar"); - ignored!(ig15, ROOT, "abc/**", "abc/x"); - ignored!(ig16, ROOT, "abc/**", "abc/x/y"); - ignored!(ig17, ROOT, "abc/**", "abc/x/y/z"); - ignored!(ig18, ROOT, "a/**/b", "a/b"); - ignored!(ig19, ROOT, "a/**/b", "a/x/b"); - ignored!(ig20, ROOT, "a/**/b", "a/x/y/b"); - ignored!(ig21, ROOT, r"\!xy", "!xy"); - ignored!(ig22, ROOT, r"\#foo", "#foo"); - ignored!(ig23, ROOT, "foo", "./foo"); - ignored!(ig24, ROOT, "target", "grep/target"); - ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock"); - ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz"); - ignored!(ig27, ROOT, "foo/", "xyz/foo", true); - ignored!(ig28, ROOT, "src/*.rs", "src/grep/src/main.rs"); - ignored!(ig29, "./src", "/llvm/", "./src/llvm", true); - ignored!(ig30, ROOT, "node_modules/ ", "node_modules", true); - - not_ignored!(ignot1, ROOT, "amonths", "months"); - not_ignored!(ignot2, ROOT, "monthsa", "months"); - not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs"); - not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c"); - not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs"); - not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs"); - not_ignored!(ignot7, ROOT, "foo/", "foo", false); - not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz"); - not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz"); - not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar"); - not_ignored!(ignot11, ROOT, "#foo", "#foo"); - not_ignored!(ignot12, ROOT, "\n\n\n", "foo"); - not_ignored!(ignot13, ROOT, "foo/**", "foo", true); - not_ignored!( - ignot14, "./third_party/protobuf", "m4/ltoptions.m4", - "./third_party/protobuf/csharp/src/packages/repositories.config"); - - // See: https://github.com/BurntSushi/ripgrep/issues/106 - #[test] - fn regression_106() { - Gitignore::from_str("/", " ").unwrap(); - } -} diff --git a/src/ignore.rs b/src/ignore.rs deleted file mode 100644 index a8cbac1a5..000000000 --- a/src/ignore.rs +++ /dev/null @@ -1,493 +0,0 @@ -/*! -The ignore module is responsible for managing the state required to determine -whether a *single* file path should be searched or not. - -In general, there are two ways to ignore a particular file: - -1. Specify an ignore rule in some "global" configuration, such as a - $HOME/.ignore or on the command line. -2. A specific ignore file (like .gitignore) found during directory traversal. - -The `IgnoreDir` type handles ignore patterns for any one particular directory -(including "global" ignore patterns), while the `Ignore` type handles a stack -of `IgnoreDir`s for use during directory traversal. -*/ - -use std::error::Error as StdError; -use std::ffi::OsString; -use std::fmt; -use std::io; -use std::path::{Path, PathBuf}; - -use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern}; -use pathutil::{file_name, is_hidden, strip_prefix}; -use types::Types; - -const IGNORE_NAMES: &'static [&'static str] = &[ - ".gitignore", - ".ignore", - ".rgignore", -]; - -/// Represents an error that can occur when parsing a gitignore file. -#[derive(Debug)] -pub enum Error { - Gitignore(gitignore::Error), - Io { - path: PathBuf, - err: io::Error, - }, -} - -impl Error { - fn from_io>(path: P, err: io::Error) -> Error { - Error::Io { path: path.as_ref().to_path_buf(), err: err } - } -} - -impl StdError for Error { - fn description(&self) -> &str { - match *self { - Error::Gitignore(ref err) => err.description(), - Error::Io { ref err, .. } => err.description(), - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::Gitignore(ref err) => err.fmt(f), - Error::Io { ref path, ref err } => { - write!(f, "{}: {}", path.display(), err) - } - } - } -} - -impl From for Error { - fn from(err: gitignore::Error) -> Error { - Error::Gitignore(err) - } -} - -/// Ignore represents a collection of ignore patterns organized by directory. -/// In particular, a stack is maintained, where the top of the stack -/// corresponds to the current directory being searched and the bottom of the -/// stack represents the root of a search. Ignore patterns at the top of the -/// stack take precedence over ignore patterns at the bottom of the stack. -pub struct Ignore { - /// A stack of ignore patterns at each directory level of traversal. - /// A directory that contributes no ignore patterns is `None`. - stack: Vec, - /// A stack of parent directories above the root of the current search. - parent_stack: Vec, - /// A set of override globs that are always checked first. A match (whether - /// it's whitelist or blacklist) trumps anything in stack. - overrides: Overrides, - /// A file type matcher. - types: Types, - /// Whether to ignore hidden files or not. - ignore_hidden: bool, - /// When true, don't look at .gitignore or .ignore files for ignore - /// rules. - no_ignore: bool, - /// When true, don't look at .gitignore files for ignore rules. - no_ignore_vcs: bool, -} - -impl Ignore { - /// Create an empty set of ignore patterns. - pub fn new() -> Ignore { - Ignore { - stack: vec![], - parent_stack: vec![], - overrides: Overrides::new(None), - types: Types::empty(), - ignore_hidden: true, - no_ignore: false, - no_ignore_vcs: true, - } - } - - /// Set whether hidden files/folders should be ignored (defaults to true). - pub fn ignore_hidden(&mut self, yes: bool) -> &mut Ignore { - self.ignore_hidden = yes; - self - } - - /// When set, ignore files are ignored. - pub fn no_ignore(&mut self, yes: bool) -> &mut Ignore { - self.no_ignore = yes; - self - } - - /// When set, VCS ignore files are ignored. - pub fn no_ignore_vcs(&mut self, yes: bool) -> &mut Ignore { - self.no_ignore_vcs = yes; - self - } - - /// Add a set of globs that overrides all other match logic. - pub fn add_override(&mut self, gi: Gitignore) -> &mut Ignore { - self.overrides = Overrides::new(Some(gi)); - self - } - - /// Add a file type matcher. The file type matcher has the lowest - /// precedence. - pub fn add_types(&mut self, types: Types) -> &mut Ignore { - self.types = types; - self - } - - /// Push parent directories of `path` on to the stack. - pub fn push_parents>( - &mut self, - path: P, - ) -> Result<(), Error> { - let path = try!(path.as_ref().canonicalize().map_err(|err| { - Error::from_io(path.as_ref(), err) - })); - let mut path = &*path; - let mut saw_git = path.join(".git").is_dir(); - let mut ignore_names = IGNORE_NAMES.to_vec(); - if self.no_ignore_vcs { - ignore_names.retain(|&name| name != ".gitignore"); - } - let mut ignore_dir_results = vec![]; - while let Some(parent) = path.parent() { - if self.no_ignore { - ignore_dir_results.push(Ok(IgnoreDir::empty(parent))); - } else { - if saw_git { - ignore_names.retain(|&name| name != ".gitignore"); - } else { - saw_git = parent.join(".git").is_dir(); - } - let ignore_dir_result = - IgnoreDir::with_ignore_names(parent, ignore_names.iter()); - ignore_dir_results.push(ignore_dir_result); - } - path = parent; - } - - for ignore_dir_result in ignore_dir_results.into_iter().rev() { - self.parent_stack.push(try!(ignore_dir_result)); - } - Ok(()) - } - - /// Add a directory to the stack. - /// - /// Note that even if this returns an error, the directory is added to the - /// stack (and therefore should be popped). - pub fn push>(&mut self, path: P) -> Result<(), Error> { - if self.no_ignore { - self.stack.push(IgnoreDir::empty(path)); - Ok(()) - } else if self.no_ignore_vcs { - self.push_ignore_dir(IgnoreDir::without_vcs(path)) - } else { - self.push_ignore_dir(IgnoreDir::new(path)) - } - } - - /// Pushes the result of building a directory matcher on to the stack. - /// - /// If the result given contains an error, then it is returned. - pub fn push_ignore_dir( - &mut self, - result: Result, - ) -> Result<(), Error> { - match result { - Ok(id) => { - self.stack.push(id); - Ok(()) - } - Err(err) => { - // Don't leave the stack in an inconsistent state. - self.stack.push(IgnoreDir::empty("error")); - Err(err) - } - } - } - - /// Pop a directory from the stack. - /// - /// This panics if the stack is empty. - pub fn pop(&mut self) { - self.stack.pop().expect("non-empty stack"); - } - - /// Returns true if and only if the given file path should be ignored. - pub fn ignored>(&self, path: P, is_dir: bool) -> bool { - let mut path = path.as_ref(); - if let Some(p) = strip_prefix("./", path) { - path = p; - } - let mat = self.overrides.matched(path, is_dir); - if let Some(is_ignored) = self.ignore_match(path, mat) { - return is_ignored; - } - let mut whitelisted = false; - if !self.no_ignore { - for id in self.stack.iter().rev() { - let mat = id.matched(path, is_dir); - if let Some(is_ignored) = self.ignore_match(path, mat) { - if is_ignored { - return true; - } - // If this path is whitelisted by an ignore, then - // fallthrough and let the file type matcher have a say. - whitelisted = true; - break; - } - } - // If the file has been whitelisted, then we have to stop checking - // parent directories. The only thing that can override a whitelist - // at this point is a type filter. - if !whitelisted { - let mut path = path.to_path_buf(); - for id in self.parent_stack.iter().rev() { - if let Some(ref dirname) = id.name { - path = Path::new(dirname).join(path); - } - let mat = id.matched(&*path, is_dir); - if let Some(is_ignored) = self.ignore_match(&*path, mat) { - if is_ignored { - return true; - } - // If this path is whitelisted by an ignore, then - // fallthrough and let the file type matcher have a - // say. - whitelisted = true; - break; - } - } - } - } - let mat = self.types.matched(path, is_dir); - if let Some(is_ignored) = self.ignore_match(path, mat) { - if is_ignored { - return true; - } - whitelisted = true; - } - if !whitelisted && self.ignore_hidden && is_hidden(&path) { - debug!("{} ignored because it is hidden", path.display()); - return true; - } - false - } - - /// Returns true if the given match says the given pattern should be - /// ignored or false if the given pattern should be explicitly whitelisted. - /// Returns None otherwise. - pub fn ignore_match>( - &self, - path: P, - mat: Match, - ) -> Option { - let path = path.as_ref(); - match mat { - Match::Whitelist(ref pat) => { - debug!("{} whitelisted by {:?}", path.display(), pat); - Some(false) - } - Match::Ignored(ref pat) => { - debug!("{} ignored by {:?}", path.display(), pat); - Some(true) - } - Match::None => None, - } - } -} - -/// IgnoreDir represents a set of ignore patterns retrieved from a single -/// directory. -#[derive(Debug)] -pub struct IgnoreDir { - /// The path to this directory as given. - path: PathBuf, - /// The directory name, if one exists. - name: Option, - /// A single accumulation of glob patterns for this directory, matched - /// using gitignore semantics. - /// - /// This will include patterns from rgignore as well. The patterns are - /// ordered so that precedence applies automatically (e.g., rgignore - /// patterns procede gitignore patterns). - gi: Option, - // TODO(burntsushi): Matching other types of glob patterns that don't - // conform to gitignore will probably require refactoring this approach. -} - -impl IgnoreDir { - /// Create a new matcher for the given directory. - pub fn new>(path: P) -> Result { - IgnoreDir::with_ignore_names(path, IGNORE_NAMES.iter()) - } - - /// Create a new matcher for the given directory. - /// - /// Don't respect VCS ignore files. - pub fn without_vcs>(path: P) -> Result { - let names = IGNORE_NAMES.iter().filter(|name| **name != ".gitignore"); - IgnoreDir::with_ignore_names(path, names) - } - - /// Create a new IgnoreDir that never matches anything with the given path. - pub fn empty>(path: P) -> IgnoreDir { - IgnoreDir { - path: path.as_ref().to_path_buf(), - name: file_name(path.as_ref()).map(|s| s.to_os_string()), - gi: None, - } - } - - /// Create a new matcher for the given directory using only the ignore - /// patterns found in the file names given. - /// - /// If no ignore glob patterns could be found in the directory then `None` - /// is returned. - /// - /// Note that the order of the names given is meaningful. Names appearing - /// later in the list have precedence over names appearing earlier in the - /// list. - pub fn with_ignore_names, S, I>( - path: P, - names: I, - ) -> Result - where P: AsRef, S: AsRef, I: Iterator { - let mut id = IgnoreDir::empty(path); - let mut ok = false; - let mut builder = GitignoreBuilder::new(&id.path); - // The ordering here is important. Later globs have higher precedence. - for name in names { - ok = builder.add_path(id.path.join(name.as_ref())).is_ok() || ok; - } - if !ok { - return Ok(id); - } - id.gi = Some(try!(builder.build())); - Ok(id) - } - - /// Returns true if and only if the given file path should be ignored - /// according to the globs in this directory. `is_dir` should be true if - /// the path refers to a directory and false otherwise. - /// - /// Before matching path, its prefix (as determined by a common suffix - /// of this directory) is stripped. If there is - /// no common suffix/prefix overlap, then path is assumed to reside - /// directly in this directory. - /// - /// If the given path has a `./` prefix then it is stripped before - /// matching. - pub fn matched>(&self, path: P, is_dir: bool) -> Match { - self.gi.as_ref() - .map(|gi| gi.matched(path, is_dir)) - .unwrap_or(Match::None) - } -} - -/// Manages a set of overrides provided explicitly by the end user. -struct Overrides { - gi: Option, - unmatched_pat: Pattern, -} - -impl Overrides { - /// Creates a new set of overrides from the gitignore matcher provided. - /// If no matcher is provided, then the resulting overrides have no effect. - fn new(gi: Option) -> Overrides { - Overrides { - gi: gi, - unmatched_pat: Pattern { - from: Path::new("").to_path_buf(), - original: "".to_string(), - pat: "".to_string(), - whitelist: false, - only_dir: false, - }, - } - } - - /// Returns a match for the given path against this set of overrides. - /// - /// If there are no overrides, then this always returns Match::None. - /// - /// If there is at least one positive override, then this never returns - /// Match::None (and interpreting non-matches as ignored) unless is_dir - /// is true. - pub fn matched>(&self, path: P, is_dir: bool) -> Match { - let path = path.as_ref(); - self.gi.as_ref() - .map(|gi| { - let mat = gi.matched_stripped(path, is_dir).invert(); - if mat.is_none() && !is_dir { - if gi.num_ignores() > 0 { - return Match::Ignored(&self.unmatched_pat); - } - } - mat - }) - .unwrap_or(Match::None) - } -} - -#[cfg(test)] -mod tests { - use std::path::Path; - use gitignore::GitignoreBuilder; - use super::IgnoreDir; - - macro_rules! ignored_dir { - ($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => { - #[test] - fn $name() { - let mut builder = GitignoreBuilder::new(&$root); - builder.add_str($gi).unwrap(); - builder.add_str($xi).unwrap(); - let gi = builder.build().unwrap(); - let id = IgnoreDir { - path: Path::new($root).to_path_buf(), - name: Path::new($root).file_name().map(|s| { - s.to_os_string() - }), - gi: Some(gi), - }; - assert!(id.matched($path, false).is_ignored()); - } - }; - } - - macro_rules! not_ignored_dir { - ($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => { - #[test] - fn $name() { - let mut builder = GitignoreBuilder::new(&$root); - builder.add_str($gi).unwrap(); - builder.add_str($xi).unwrap(); - let gi = builder.build().unwrap(); - let id = IgnoreDir { - path: Path::new($root).to_path_buf(), - name: Path::new($root).file_name().map(|s| { - s.to_os_string() - }), - gi: Some(gi), - }; - assert!(!id.matched($path, false).is_ignored()); - } - }; - } - - const ROOT: &'static str = "/home/foobar/rust/rg"; - - ignored_dir!(id1, ROOT, "src/main.rs", "", "src/main.rs"); - ignored_dir!(id2, ROOT, "", "src/main.rs", "src/main.rs"); - ignored_dir!(id3, ROOT, "!src/main.rs", "*.rs", "src/main.rs"); - - not_ignored_dir!(idnot1, ROOT, "*.rs", "!src/main.rs", "src/main.rs"); -} diff --git a/src/main.rs b/src/main.rs index 7a6ac0215..e64ddf9c2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,8 @@ extern crate deque; extern crate docopt; extern crate env_logger; -extern crate globset; extern crate grep; +extern crate ignore; #[cfg(windows)] extern crate kernel32; #[macro_use] @@ -16,8 +16,6 @@ extern crate num_cpus; extern crate regex; extern crate rustc_serialize; extern crate term; -extern crate thread_local; -extern crate walkdir; #[cfg(windows)] extern crate winapi; @@ -36,7 +34,7 @@ use deque::{Stealer, Stolen}; use grep::Grep; use memmap::{Mmap, Protection}; use term::Terminal; -use walkdir::DirEntry; +use ignore::DirEntry; use args::Args; use out::{ColoredTerminal, Out}; @@ -61,8 +59,6 @@ macro_rules! eprintln { mod args; mod atty; -mod gitignore; -mod ignore; mod out; mod pathutil; mod printer; @@ -70,8 +66,6 @@ mod search_buffer; mod search_stream; #[cfg(windows)] mod terminal_win; -mod types; -mod walk; pub type Result = result::Result>; @@ -101,7 +95,6 @@ fn run(args: Args) -> Result { if threads == 1 || isone { return run_one_thread(args.clone()); } - let out = Arc::new(Mutex::new(args.out())); let quiet_matched = QuietMatched::new(args.quiet()); let mut workers = vec![]; @@ -126,21 +119,15 @@ fn run(args: Args) -> Result { workq }; let mut paths_searched: u64 = 0; - for p in paths { + for dent in args.walker() { if quiet_matched.has_match() { break; } - if p == Path::new("-") { - paths_searched += 1; + paths_searched += 1; + if dent.is_stdin() { workq.push(Work::Stdin); } else { - for ent in try!(args.walker(p)) { - if quiet_matched.has_match() { - break; - } - paths_searched += 1; - workq.push(Work::File(ent)); - } + workq.push(Work::File(dent)); } } if !paths.is_empty() && paths_searched == 0 { @@ -165,47 +152,33 @@ fn run_one_thread(args: Arc) -> Result { grep: args.grep(), match_count: 0, }; - let paths = args.paths(); let mut term = args.stdout(); - let mut paths_searched: u64 = 0; - for p in paths { - if args.quiet() && worker.match_count > 0 { - break; - } - if p == Path::new("-") { - paths_searched += 1; - let mut printer = args.printer(&mut term); - if worker.match_count > 0 { - if let Some(sep) = args.file_separator() { - printer = printer.file_separator(sep); - } + for dent in args.walker() { + let mut printer = args.printer(&mut term); + if worker.match_count > 0 { + if args.quiet() { + break; + } + if let Some(sep) = args.file_separator() { + printer = printer.file_separator(sep); } + } + paths_searched += 1; + if dent.is_stdin() { worker.do_work(&mut printer, WorkReady::Stdin); } else { - for ent in try!(args.walker(p)) { - paths_searched += 1; - let mut printer = args.printer(&mut term); - if worker.match_count > 0 { - if args.quiet() { - break; - } - if let Some(sep) = args.file_separator() { - printer = printer.file_separator(sep); - } + let file = match File::open(dent.path()) { + Ok(file) => file, + Err(err) => { + eprintln!("{}: {}", dent.path().display(), err); + continue; } - let file = match File::open(ent.path()) { - Ok(file) => file, - Err(err) => { - eprintln!("{}: {}", ent.path().display(), err); - continue; - } - }; - worker.do_work(&mut printer, WorkReady::DirFile(ent, file)); - } + }; + worker.do_work(&mut printer, WorkReady::DirFile(dent, file)); } } - if !paths.is_empty() && paths_searched == 0 { + if !args.paths().is_empty() && paths_searched == 0 { eprintln!("No files were searched, which means ripgrep probably \ applied a filter you didn't expect. \ Try running again with --debug."); @@ -217,16 +190,9 @@ fn run_files(args: Arc) -> Result { let term = args.stdout(); let mut printer = args.printer(term); let mut file_count = 0; - for p in args.paths() { - if p == Path::new("-") { - printer.path(&Path::new("")); - file_count += 1; - } else { - for ent in try!(args.walker(p)) { - printer.path(ent.path()); - file_count += 1; - } - } + for dent in args.walker() { + printer.path(dent.path()); + file_count += 1; } Ok(file_count) } diff --git a/src/pathutil.rs b/src/pathutil.rs index 073010bea..085c9dbc0 100644 --- a/src/pathutil.rs +++ b/src/pathutil.rs @@ -8,7 +8,6 @@ with the raw bytes directly. On large repositories (like chromium), this can have a ~25% performance improvement on just listing the files to search (!). */ -use std::ffi::OsStr; use std::path::Path; /// Strip `prefix` from the `path` and return the remainder. @@ -19,6 +18,7 @@ pub fn strip_prefix<'a, P: AsRef + ?Sized>( prefix: &'a P, path: &'a Path, ) -> Option<&'a Path> { + use std::ffi::OsStr; use std::os::unix::ffi::OsStrExt; let prefix = prefix.as_ref().as_os_str().as_bytes(); @@ -40,79 +40,3 @@ pub fn strip_prefix<'a, P: AsRef + ?Sized>( ) -> Option<&'a Path> { path.strip_prefix(prefix).ok() } - -/// The final component of the path, if it is a normal file. -/// -/// If the path terminates in ., .., or consists solely of a root of prefix, -/// file_name will return None. -#[cfg(unix)] -pub fn file_name<'a, P: AsRef + ?Sized>( - path: &'a P, -) -> Option<&'a OsStr> { - use std::os::unix::ffi::OsStrExt; - use memchr::memrchr; - - let path = path.as_ref().as_os_str().as_bytes(); - if path.is_empty() { - return None; - } else if path.len() == 1 && path[0] == b'.' { - return None; - } else if path.last() == Some(&b'.') { - return None; - } else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] { - return None; - } - let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0); - Some(OsStr::from_bytes(&path[last_slash..])) -} - -/// The final component of the path, if it is a normal file. -/// -/// If the path terminates in ., .., or consists solely of a root of prefix, -/// file_name will return None. -#[cfg(not(unix))] -pub fn file_name<'a, P: AsRef + ?Sized>( - path: &'a P, -) -> Option<&'a OsStr> { - path.as_ref().file_name() -} - -/// Returns true if and only if this file path is considered to be hidden. -#[cfg(unix)] -pub fn is_hidden>(path: P) -> bool { - use std::os::unix::ffi::OsStrExt; - - if let Some(name) = file_name(path.as_ref()) { - name.as_bytes().get(0) == Some(&b'.') - } else { - false - } -} - -/// Returns true if and only if this file path is considered to be hidden. -#[cfg(not(unix))] -pub fn is_hidden>(path: P) -> bool { - if let Some(name) = file_name(path.as_ref()) { - name.to_str().map(|s| s.starts_with(".")).unwrap_or(false) - } else { - false - } -} - -/// Returns true if this file path is just a file name. i.e., Its parent is -/// the empty string. -#[cfg(unix)] -pub fn is_file_name>(path: P) -> bool { - use std::os::unix::ffi::OsStrExt; - use memchr::memchr; - - let path = path.as_ref().as_os_str().as_bytes(); - memchr(b'/', path).is_none() -} - -/// Returns true if this file path is just a file name. i.e., Its parent is -/// the empty string. -#[cfg(not(unix))] -pub fn is_file_name>(path: P) -> bool { - path.as_ref().parent().map(|p| p.as_os_str().is_empty()).unwrap_or(false) -} diff --git a/src/printer.rs b/src/printer.rs index 9a5c649d9..e7373bce6 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -5,7 +5,7 @@ use term::{Attr, Terminal}; use term::color; use pathutil::strip_prefix; -use types::FileTypeDef; +use ignore::types::FileTypeDef; /// Printer encapsulates all output logic for searching. /// @@ -168,11 +168,11 @@ impl Printer { self.write(def.name().as_bytes()); self.write(b": "); let mut first = true; - for pat in def.patterns() { + for glob in def.globs() { if !first { self.write(b", "); } - self.write(pat.as_bytes()); + self.write(glob.as_bytes()); first = false; } self.write_eol(); diff --git a/src/terminal.rs b/src/terminal.rs deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/types.rs b/src/types.rs deleted file mode 100644 index 7c33a48c1..000000000 --- a/src/types.rs +++ /dev/null @@ -1,458 +0,0 @@ -/*! -The types module provides a way of associating glob patterns on file names to -file types. -*/ - -use std::collections::HashMap; -use std::error::Error as StdError; -use std::fmt; -use std::path::Path; - -use regex; - -use gitignore::{Match, Pattern}; -use globset::{self, GlobBuilder, GlobSet, GlobSetBuilder}; - -const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[ - ("asm", &["*.asm", "*.s", "*.S"]), - ("awk", &["*.awk"]), - ("c", &["*.c", "*.h", "*.H"]), - ("cbor", &["*.cbor"]), - ("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]), - ("cmake", &["*.cmake", "CMakeLists.txt"]), - ("coffeescript", &["*.coffee"]), - ("config", &["*.config"]), - ("cpp", &[ - "*.C", "*.cc", "*.cpp", "*.cxx", - "*.h", "*.H", "*.hh", "*.hpp", - ]), - ("csharp", &["*.cs"]), - ("css", &["*.css"]), - ("cython", &["*.pyx"]), - ("dart", &["*.dart"]), - ("d", &["*.d"]), - ("elisp", &["*.el"]), - ("erlang", &["*.erl", "*.hrl"]), - ("fortran", &[ - "*.f", "*.F", "*.f77", "*.F77", "*.pfo", - "*.f90", "*.F90", "*.f95", "*.F95", - ]), - ("fsharp", &["*.fs", "*.fsx", "*.fsi"]), - ("go", &["*.go"]), - ("groovy", &["*.groovy", "*.gradle"]), - ("hbs", &["*.hbs"]), - ("haskell", &["*.hs", "*.lhs"]), - ("html", &["*.htm", "*.html"]), - ("java", &["*.java"]), - ("jinja", &["*.jinja", "*.jinja2"]), - ("js", &[ - "*.js", "*.jsx", "*.vue", - ]), - ("json", &["*.json"]), - ("jsonl", &["*.jsonl"]), - ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]), - ("lua", &["*.lua"]), - ("m4", &["*.ac", "*.m4"]), - ("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk"]), - ("markdown", &["*.md"]), - ("md", &["*.md"]), - ("matlab", &["*.m"]), - ("mk", &["mkfile"]), - ("ml", &["*.ml"]), - ("nim", &["*.nim"]), - ("objc", &["*.h", "*.m"]), - ("objcpp", &["*.h", "*.mm"]), - ("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]), - ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]), - ("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]), - ("py", &["*.py", "*.pyx"]), - ("readme", &["README*", "*README"]), - ("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]), - ("rst", &["*.rst"]), - ("ruby", &["*.rb"]), - ("rust", &["*.rs"]), - ("scala", &["*.scala"]), - ("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]), - ("spark", &["*.spark"]), - ("sql", &["*.sql"]), - ("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]), - ("swift", &["*.swift"]), - ("tcl", &["*.tcl"]), - ("tex", &["*.tex", "*.cls", "*.sty"]), - ("ts", &["*.ts", "*.tsx"]), - ("txt", &["*.txt"]), - ("toml", &["*.toml", "Cargo.lock"]), - ("vala", &["*.vala"]), - ("vb", &["*.vb"]), - ("vimscript", &["*.vim"]), - ("xml", &["*.xml"]), - ("yacc", &["*.y"]), - ("yaml", &["*.yaml", "*.yml"]), - ("zsh", &["*.zsh", ".zshenv", ".zlogin", ".zprofile", ".zshrc"]), -]; - -/// Describes all the possible failure conditions for building a file type -/// matcher. -#[derive(Debug)] -pub enum Error { - /// We tried to select (or negate) a file type that is not defined. - UnrecognizedFileType(String), - /// A user specified file type definition could not be parsed. - InvalidDefinition, - /// There was an error building the matcher (probably a bad glob). - Glob(globset::Error), - /// There was an error compiling a glob as a regex. - Regex(regex::Error), -} - -impl StdError for Error { - fn description(&self) -> &str { - match *self { - Error::UnrecognizedFileType(_) => "unrecognized file type", - Error::InvalidDefinition => "invalid definition", - Error::Glob(ref err) => err.description(), - Error::Regex(ref err) => err.description(), - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::UnrecognizedFileType(ref ty) => { - write!(f, "unrecognized file type: {}", ty) - } - Error::InvalidDefinition => { - write!(f, "invalid definition (format is type:glob, e.g., \ - html:*.html)") - } - Error::Glob(ref err) => err.fmt(f), - Error::Regex(ref err) => err.fmt(f), - } - } -} - -impl From for Error { - fn from(err: globset::Error) -> Error { - Error::Glob(err) - } -} - -impl From for Error { - fn from(err: regex::Error) -> Error { - Error::Regex(err) - } -} - -/// A single file type definition. -#[derive(Clone, Debug)] -pub struct FileTypeDef { - name: String, - pats: Vec, -} - -impl FileTypeDef { - /// Return the name of this file type. - pub fn name(&self) -> &str { - &self.name - } - - /// Return the glob patterns used to recognize this file type. - pub fn patterns(&self) -> &[String] { - &self.pats - } -} - -/// Types is a file type matcher. -#[derive(Clone, Debug)] -pub struct Types { - defs: Vec, - selected: Option, - negated: Option, - has_selected: bool, - unmatched_pat: Pattern, -} - -impl Types { - /// Creates a new file type matcher from the given Gitignore matcher. If - /// not Gitignore matcher is provided, then the file type matcher has no - /// effect. - /// - /// If has_selected is true, then at least one file type was selected. - /// Therefore, any non-matches should be ignored. - fn new( - selected: Option, - negated: Option, - has_selected: bool, - defs: Vec, - ) -> Types { - Types { - defs: defs, - selected: selected, - negated: negated, - has_selected: has_selected, - unmatched_pat: Pattern { - from: Path::new("").to_path_buf(), - original: "".to_string(), - pat: "".to_string(), - whitelist: false, - only_dir: false, - }, - } - } - - /// Creates a new file type matcher that never matches. - pub fn empty() -> Types { - Types::new(None, None, false, vec![]) - } - - /// Returns a match for the given path against this file type matcher. - /// - /// The path is considered whitelisted if it matches a selected file type. - /// The path is considered ignored if it matched a negated file type. - /// If at least one file type is selected and path doesn't match, then - /// the path is also considered ignored. - pub fn matched>(&self, path: P, is_dir: bool) -> Match { - // If we don't have any matcher, then we can't do anything. - if self.negated.is_none() && self.selected.is_none() { - return Match::None; - } - // File types don't apply to directories. - if is_dir { - return Match::None; - } - let path = path.as_ref(); - let name = match path.file_name() { - Some(name) => name.to_string_lossy(), - None if self.has_selected => { - return Match::Ignored(&self.unmatched_pat); - } - None => { - return Match::None; - } - }; - if self.negated.as_ref().map(|s| s.is_match(&*name)).unwrap_or(false) { - return Match::Ignored(&self.unmatched_pat); - } - if self.selected.as_ref().map(|s|s.is_match(&*name)).unwrap_or(false) { - return Match::Whitelist(&self.unmatched_pat); - } - if self.has_selected { - Match::Ignored(&self.unmatched_pat) - } else { - Match::None - } - } - - /// Return the set of current file type definitions. - pub fn definitions(&self) -> &[FileTypeDef] { - &self.defs - } -} - -/// TypesBuilder builds a type matcher from a set of file type definitions and -/// a set of file type selections. -pub struct TypesBuilder { - types: HashMap>, - selected: Vec, - negated: Vec, -} - -impl TypesBuilder { - /// Create a new builder for a file type matcher. - pub fn new() -> TypesBuilder { - TypesBuilder { - types: HashMap::new(), - selected: vec![], - negated: vec![], - } - } - - /// Build the current set of file type definitions *and* selections into - /// a file type matcher. - pub fn build(&self) -> Result { - let selected_globs = - if self.selected.is_empty() { - None - } else { - let mut bset = GlobSetBuilder::new(); - for name in &self.selected { - let globs = match self.types.get(name) { - Some(globs) => globs, - None => { - let msg = name.to_string(); - return Err(Error::UnrecognizedFileType(msg)); - } - }; - for glob in globs { - let pat = try!( - GlobBuilder::new(glob) - .literal_separator(true).build()); - bset.add(pat); - } - } - Some(try!(bset.build())) - }; - let negated_globs = - if self.negated.is_empty() { - None - } else { - let mut bset = GlobSetBuilder::new(); - for name in &self.negated { - let globs = match self.types.get(name) { - Some(globs) => globs, - None => { - let msg = name.to_string(); - return Err(Error::UnrecognizedFileType(msg)); - } - }; - for glob in globs { - let pat = try!( - GlobBuilder::new(glob) - .literal_separator(true).build()); - bset.add(pat); - } - } - Some(try!(bset.build())) - }; - Ok(Types::new( - selected_globs, - negated_globs, - !self.selected.is_empty(), - self.definitions(), - )) - } - - /// Return the set of current file type definitions. - pub fn definitions(&self) -> Vec { - let mut defs = vec![]; - for (ref name, ref pats) in &self.types { - let mut pats = pats.to_vec(); - pats.sort(); - defs.push(FileTypeDef { - name: name.to_string(), - pats: pats, - }); - } - defs.sort_by(|def1, def2| def1.name().cmp(def2.name())); - defs - } - - /// Select the file type given by `name`. - /// - /// If `name` is `all`, then all file types are selected. - pub fn select(&mut self, name: &str) -> &mut TypesBuilder { - if name == "all" { - for name in self.types.keys() { - self.selected.push(name.to_string()); - } - } else { - self.selected.push(name.to_string()); - } - self - } - - /// Ignore the file type given by `name`. - /// - /// If `name` is `all`, then all file types are negated. - pub fn negate(&mut self, name: &str) -> &mut TypesBuilder { - if name == "all" { - for name in self.types.keys() { - self.negated.push(name.to_string()); - } - } else { - self.negated.push(name.to_string()); - } - self - } - - /// Clear any file type definitions for the type given. - pub fn clear(&mut self, name: &str) -> &mut TypesBuilder { - self.types.remove(name); - self - } - - /// Add a new file type definition. `name` can be arbitrary and `pat` - /// should be a glob recognizing file paths belonging to the `name` type. - pub fn add(&mut self, name: &str, pat: &str) -> &mut TypesBuilder { - self.types.entry(name.to_string()) - .or_insert(vec![]).push(pat.to_string()); - self - } - - /// Add a new file type definition specified in string form. The format - /// is `name:glob`. Names may not include a colon. - pub fn add_def(&mut self, def: &str) -> Result<(), Error> { - let name: String = def.chars().take_while(|&c| c != ':').collect(); - let pat: String = def.chars().skip(name.chars().count() + 1).collect(); - if name.is_empty() || pat.is_empty() { - return Err(Error::InvalidDefinition); - } - self.add(&name, &pat); - Ok(()) - } - - /// Add a set of default file type definitions. - pub fn add_defaults(&mut self) -> &mut TypesBuilder { - for &(name, exts) in TYPE_EXTENSIONS { - for ext in exts { - self.add(name, ext); - } - } - self - } -} - -#[cfg(test)] -mod tests { - use super::TypesBuilder; - - macro_rules! matched { - ($name:ident, $types:expr, $sel:expr, $selnot:expr, - $path:expr) => { - matched!($name, $types, $sel, $selnot, $path, true); - }; - (not, $name:ident, $types:expr, $sel:expr, $selnot:expr, - $path:expr) => { - matched!($name, $types, $sel, $selnot, $path, false); - }; - ($name:ident, $types:expr, $sel:expr, $selnot:expr, - $path:expr, $matched:expr) => { - #[test] - fn $name() { - let mut btypes = TypesBuilder::new(); - for tydef in $types { - btypes.add_def(tydef).unwrap(); - } - for sel in $sel { - btypes.select(sel); - } - for selnot in $selnot { - btypes.negate(selnot); - } - let types = btypes.build().unwrap(); - let mat = types.matched($path, false); - assert_eq!($matched, !mat.is_ignored()); - } - }; - } - - fn types() -> Vec<&'static str> { - vec![ - "html:*.html", - "html:*.htm", - "rust:*.rs", - "js:*.js", - ] - } - - matched!(match1, types(), vec!["rust"], vec![], "lib.rs"); - matched!(match2, types(), vec!["html"], vec![], "index.html"); - matched!(match3, types(), vec!["html"], vec![], "index.htm"); - matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs"); - matched!(match5, types(), vec![], vec![], "index.html"); - matched!(match6, types(), vec![], vec!["rust"], "index.html"); - - matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html"); - matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs"); -} diff --git a/src/walk.rs b/src/walk.rs deleted file mode 100644 index f661c4cfa..000000000 --- a/src/walk.rs +++ /dev/null @@ -1,140 +0,0 @@ -/*! -The walk module implements a recursive directory iterator (using the `walkdir`) -crate that can efficiently skip and ignore files and directories specified in -a user's ignore patterns. -*/ - -use walkdir::{self, DirEntry, WalkDir, WalkDirIterator}; - -use ignore::Ignore; - -/// Iter is a recursive directory iterator over file paths in a directory. -/// Only file paths should be searched are yielded. -pub struct Iter { - ig: Ignore, - it: WalkEventIter, -} - -impl Iter { - /// Create a new recursive directory iterator using the ignore patterns - /// and walkdir iterator given. - pub fn new(ig: Ignore, wd: WalkDir) -> Iter { - Iter { - ig: ig, - it: WalkEventIter::from(wd), - } - } - - /// Returns true if this entry should be skipped. - #[inline(always)] - fn skip_entry(&self, ent: &DirEntry) -> bool { - if ent.depth() == 0 { - // Never skip the root directory. - return false; - } - if self.ig.ignored(ent.path(), ent.file_type().is_dir()) { - return true; - } - false - } -} - -impl Iterator for Iter { - type Item = DirEntry; - - #[inline(always)] - fn next(&mut self) -> Option { - while let Some(ev) = self.it.next() { - match ev { - Err(err) => { - eprintln!("{}", err); - } - Ok(WalkEvent::Exit) => { - self.ig.pop(); - } - Ok(WalkEvent::Dir(ent)) => { - if self.skip_entry(&ent) { - self.it.it.skip_current_dir(); - // Still need to push this on the stack because we'll - // get a WalkEvent::Exit event for this dir. We don't - // care if it errors though. - let _ = self.ig.push(ent.path()); - continue; - } - if let Err(err) = self.ig.push(ent.path()) { - eprintln!("{}", err); - self.it.it.skip_current_dir(); - continue; - } - } - Ok(WalkEvent::File(ent)) => { - if self.skip_entry(&ent) { - continue; - } - // If this isn't actually a file (e.g., a symlink), then - // skip it. - if !ent.file_type().is_file() { - continue; - } - return Some(ent); - } - } - } - None - } -} - -/// WalkEventIter transforms a WalkDir iterator into an iterator that more -/// accurately describes the directory tree. Namely, it emits events that are -/// one of three types: directory, file or "exit." An "exit" event means that -/// the entire contents of a directory have been enumerated. -struct WalkEventIter { - depth: usize, - it: walkdir::Iter, - next: Option>, -} - -#[derive(Debug)] -enum WalkEvent { - Dir(DirEntry), - File(DirEntry), - Exit, -} - -impl From for WalkEventIter { - fn from(it: WalkDir) -> WalkEventIter { - WalkEventIter { depth: 0, it: it.into_iter(), next: None } - } -} - -impl Iterator for WalkEventIter { - type Item = walkdir::Result; - - #[inline(always)] - fn next(&mut self) -> Option> { - let dent = self.next.take().or_else(|| self.it.next()); - let depth = match dent { - None => 0, - Some(Ok(ref dent)) => dent.depth(), - Some(Err(ref err)) => err.depth(), - }; - if depth < self.depth { - self.depth -= 1; - self.next = dent; - return Some(Ok(WalkEvent::Exit)); - } - self.depth = depth; - match dent { - None => None, - Some(Err(err)) => Some(Err(err)), - Some(Ok(dent)) => { - if dent.file_type().is_dir() { - self.depth += 1; - Some(Ok(WalkEvent::Dir(dent))) - } else { - Some(Ok(WalkEvent::File(dent))) - } - } - } - } -} diff --git a/tests/tests.rs b/tests/tests.rs index a559045c1..795c09963 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -54,6 +54,20 @@ fn path(unix: &str) -> String { } } +fn paths(unix: &[&str]) -> Vec { + let mut xs: Vec<_> = unix.iter().map(|s| path(s)).collect(); + xs.sort(); + xs +} + +fn paths_from_stdout(stdout: String) -> Vec { + let mut paths: Vec<_> = stdout.lines().map(|s| { + s.split(":").next().unwrap().to_string() + }).collect(); + paths.sort(); + paths +} + fn sort_lines(lines: &str) -> String { let mut lines: Vec = lines.trim().lines().map(|s| s.to_owned()).collect(); @@ -864,6 +878,74 @@ be, to a very large extent, the result of luck. Sherlock Holmes assert_eq!(lines, expected); }); +// See: https://github.com/BurntSushi/ripgrep/issues/45 +sherlock!(feature_45_relative_cwd, "test", ".", +|wd: WorkDir, mut cmd: Command| { + wd.create(".not-an-ignore", "foo\n/bar"); + wd.create_dir("bar"); + wd.create_dir("baz/bar"); + wd.create_dir("baz/baz/bar"); + wd.create("bar/test", "test"); + wd.create("baz/bar/test", "test"); + wd.create("baz/baz/bar/test", "test"); + wd.create("baz/foo", "test"); + wd.create("baz/test", "test"); + wd.create("foo", "test"); + wd.create("test", "test"); + + // First, get a baseline without applying ignore rules. + let lines = paths_from_stdout(wd.stdout(&mut cmd)); + assert_eq!(lines, paths(&[ + "bar/test", "baz/bar/test", "baz/baz/bar/test", "baz/foo", + "baz/test", "foo", "test", + ])); + + // Now try again with the ignore file activated. + cmd.arg("--ignore-file").arg(".not-an-ignore"); + let lines = paths_from_stdout(wd.stdout(&mut cmd)); + assert_eq!(lines, paths(&[ + "baz/bar/test", "baz/baz/bar/test", "baz/test", "test", + ])); + + // Now do it again, but inside the baz directory. + // Since the ignore file is interpreted relative to the CWD, this will + // cause the /bar anchored pattern to filter out baz/bar, which is a + // subtle difference between true parent ignore files and manually + // specified ignore files. + let mut cmd = wd.command(); + cmd.arg("test").arg(".").arg("--ignore-file").arg("../.not-an-ignore"); + cmd.current_dir(wd.path().join("baz")); + let lines = paths_from_stdout(wd.stdout(&mut cmd)); + assert_eq!(lines, paths(&["baz/bar/test", "test"])); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/45 +sherlock!(feature_45_precedence_with_others, "test", ".", +|wd: WorkDir, mut cmd: Command| { + wd.create(".not-an-ignore", "*.log"); + wd.create(".ignore", "!imp.log"); + wd.create("imp.log", "test"); + wd.create("wat.log", "test"); + + cmd.arg("--ignore-file").arg(".not-an-ignore"); + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines, "imp.log:test\n"); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/45 +sherlock!(feature_45_precedence_internal, "test", ".", +|wd: WorkDir, mut cmd: Command| { + wd.create(".not-an-ignore1", "*.log"); + wd.create(".not-an-ignore2", "!imp.log"); + wd.create("imp.log", "test"); + wd.create("wat.log", "test"); + + cmd.arg("--ignore-file").arg(".not-an-ignore1"); + cmd.arg("--ignore-file").arg(".not-an-ignore2"); + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines, "imp.log:test\n"); +}); + // See: https://github.com/BurntSushi/ripgrep/issues/68 clean!(feature_68_no_ignore_vcs, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create(".gitignore", "foo");