From dd73447329e637ee207b1103ecb6a4bdbdc89324 Mon Sep 17 00:00:00 2001 From: David Cantrell Date: Sun, 31 Jul 2022 15:29:48 +0100 Subject: [PATCH] feat: Add the ability to have some file extensions *prevent* a module from triggering (#4043) * test that we can match a multi-part file extension such as in foo.tar.gz * now we can match multi-part file extensions like on foo.tar.gz * add a test that a !ext is a negative match and over-rides any positive match * test that negative extensions that don't match any file have no effect * fail the match if any negative extensions exist * cargo fmt I'm not happy with this, in particular it's made the structures of has_any_positive_extension and has_no_negative_extension look different, and the logic in is_match is harder to follow * placate clippy * documentation for multi-part extensions and negative extensions * get rid of an unnecessary .to_string() and comment the necessary but weird-looking invocations of .to_string_lossy().to_string() * tests for negative matching of files and folders * fail the match is any negative files/folders match * document file/folder negative matching; be less prolix * suppress Nodejs if Deno files are present (#2627) * Revert "suppress Nodejs if Deno files are present (#2627)" This reverts commit c1394fd7b37bb0bf06b1449e074020a2e16bfa04. This was a terrible way of doing this, there's got to be a better way! --- docs/config/README.md | 18 ++++++ src/context.rs | 117 ++++++++++++++++++++++++++++++++++----- src/modules/directory.rs | 2 + 3 files changed, 124 insertions(+), 13 deletions(-) diff --git a/docs/config/README.md b/docs/config/README.md index 5e74b790..762262db 100644 --- a/docs/config/README.md +++ b/docs/config/README.md @@ -152,6 +152,24 @@ format = ''' \$''' ``` +### Negative matching + +Many modules have `detect_extensions`, `detect_files`, and `detect_folders` variables. These take +lists of strings to match or not match. "Negative" options, those which should not be matched, are +indicated with a leading "!" character. The presence of _any_ negative indicator in the directory +will result in the module not being matched. + +Extensions are matched against both the characters after the last dot in a filename, and the +characters after the first dot in a filename. For example, `foo.bar.tar.gz` will be matched +against `bar.tar.gz` and `gz` in the `detect_extensions` variable. Files whose name begins with a +dot are not considered to have extensions at all. + +To see how this works in practice, you could match TypeScript but not MPEG Transport Stream files thus: + +```toml +detect_extensions = ["ts", "!video.ts", "!audio.ts"] +``` + ## Prompt This is the list of prompt-wide configuration options. diff --git a/src/context.rs b/src/context.rs index 743c242f..404cf5aa 100644 --- a/src/context.rs +++ b/src/context.rs @@ -397,10 +397,27 @@ impl DirContents { folders.insert(path); } else { if !path.to_string_lossy().starts_with('.') { + // Extract the file extensions (yes, that's plural) from a filename. + // Why plural? Consider the case of foo.tar.gz. It's a compressed + // tarball (tar.gz), and it's a gzipped file (gz). We should be able + // to match both. + + // find the minimal extension on a file. ie, the gz in foo.tar.gz + // NB the .to_string_lossy().to_string() here looks weird but is + // required to convert it from a Cow. path.extension() .map(|ext| extensions.insert(ext.to_string_lossy().to_string())); + + // find the full extension on a file. ie, the tar.gz in foo.tar.gz + path.file_name().map(|file_name| { + file_name + .to_string_lossy() + .split_once('.') + .map(|(_, after)| extensions.insert(after.to_string())) + }); } if let Some(file_name) = path.file_name() { + // this .to_string_lossy().to_string() is also required file_names.insert(file_name.to_string_lossy().to_string()); } files.insert(path); @@ -432,24 +449,47 @@ impl DirContents { self.file_names.contains(name) } - pub fn has_any_file_name(&self, names: &[&str]) -> bool { - names.iter().any(|name| self.has_file_name(name)) - } - pub fn has_folder(&self, path: &str) -> bool { self.folders.contains(Path::new(path)) } - pub fn has_any_folder(&self, paths: &[&str]) -> bool { - paths.iter().any(|path| self.has_folder(path)) - } - pub fn has_extension(&self, ext: &str) -> bool { self.extensions.contains(ext) } - pub fn has_any_extension(&self, exts: &[&str]) -> bool { - exts.iter().any(|ext| self.has_extension(ext)) + pub fn has_any_positive_file_name(&self, names: &[&str]) -> bool { + names + .iter() + .any(|name| !name.starts_with('!') && self.has_file_name(name)) + } + + pub fn has_any_positive_folder(&self, paths: &[&str]) -> bool { + paths + .iter() + .any(|path| !path.starts_with('!') && self.has_folder(path)) + } + + pub fn has_any_positive_extension(&self, exts: &[&str]) -> bool { + exts.iter() + .any(|ext| !ext.starts_with('!') && self.has_extension(ext)) + } + + pub fn has_no_negative_file_name(&self, names: &[&str]) -> bool { + !names + .iter() + .any(|name| name.starts_with('!') && self.has_file_name(&name[1..])) + } + + pub fn has_no_negative_folder(&self, paths: &[&str]) -> bool { + !paths + .iter() + .any(|path| path.starts_with('!') && self.has_folder(&path[1..])) + } + + pub fn has_no_negative_extension(&self, exts: &[&str]) -> bool { + !exts + .iter() + .any(|ext| ext.starts_with('!') && self.has_extension(&ext[1..])) } } @@ -516,9 +556,16 @@ impl<'a> ScanDir<'a> { /// based on the current `PathBuf` check to see /// if any of this criteria match or exist and returning a boolean pub fn is_match(&self) -> bool { - self.dir_contents.has_any_extension(self.extensions) - || self.dir_contents.has_any_folder(self.folders) - || self.dir_contents.has_any_file_name(self.files) + // if there exists a file with a file/folder/ext we've said we don't want, + // fail the match straight away + self.dir_contents.has_no_negative_extension(self.extensions) + && self.dir_contents.has_no_negative_file_name(self.files) + && self.dir_contents.has_no_negative_folder(self.folders) + && (self + .dir_contents + .has_any_positive_extension(self.extensions) + || self.dir_contents.has_any_positive_file_name(self.files) + || self.dir_contents.has_any_positive_folder(self.folders)) } } @@ -726,6 +773,50 @@ mod tests { .is_match()); node.close()?; + let tarballs = testdir(&["foo.tgz", "foo.tar.gz"])?; + let tarballs_dc = DirContents::from_path(tarballs.path())?; + assert!(ScanDir { + dir_contents: &tarballs_dc, + files: &[], + extensions: &["tar.gz"], + folders: &[], + } + .is_match()); + tarballs.close()?; + + let dont_match_ext = testdir(&["foo.js", "foo.ts"])?; + let dont_match_ext_dc = DirContents::from_path(dont_match_ext.path())?; + assert!(!ScanDir { + dir_contents: &dont_match_ext_dc, + files: &[], + extensions: &["js", "!notfound", "!ts"], + folders: &[], + } + .is_match()); + dont_match_ext.close()?; + + let dont_match_file = testdir(&["goodfile", "evilfile"])?; + let dont_match_file_dc = DirContents::from_path(dont_match_file.path())?; + assert!(!ScanDir { + dir_contents: &dont_match_file_dc, + files: &["goodfile", "!notfound", "!evilfile"], + extensions: &[], + folders: &[], + } + .is_match()); + dont_match_file.close()?; + + let dont_match_folder = testdir(&["gooddir/somefile", "evildir/somefile"])?; + let dont_match_folder_dc = DirContents::from_path(dont_match_folder.path())?; + assert!(!ScanDir { + dir_contents: &dont_match_folder_dc, + files: &[], + extensions: &[], + folders: &["gooddir", "!notfound", "!evildir"], + } + .is_match()); + dont_match_folder.close()?; + Ok(()) } diff --git a/src/modules/directory.rs b/src/modules/directory.rs index 109a9176..29dc94bd 100644 --- a/src/modules/directory.rs +++ b/src/modules/directory.rs @@ -498,6 +498,8 @@ mod tests { fn make_known_tempdir(root: &Path) -> io::Result<(TempDir, String)> { fs::create_dir_all(root)?; let dir = TempDir::new_in(root)?; + // the .to_string_lossy().to_string() here looks weird but is required + // to convert it from a Cow. let path = dir .path() .file_name()