feat: Add the ability to have some file extensions *prevent* a module from triggering (#4043)

* test that we can match a multi-part file extension such as in foo.tar.gz

* now we can match multi-part file extensions like on foo.tar.gz

* add a test that a !ext is a negative match and over-rides any positive match

* test that negative extensions that don't match any file have no effect

* fail the match if any negative extensions exist

* cargo fmt

I'm not happy with this, in particular it's made the structures of has_any_positive_extension and has_no_negative_extension look different, and the logic in is_match is harder to follow

* placate clippy

* documentation for multi-part extensions and negative extensions

* get rid of an unnecessary .to_string() and comment the necessary but weird-looking invocations of .to_string_lossy().to_string()

* tests for negative matching of files and folders

* fail the match is any negative files/folders match

* document file/folder negative matching; be less prolix

* suppress Nodejs if Deno files are present (#2627)

* Revert "suppress Nodejs if Deno files are present (#2627)"

This reverts commit c1394fd7b37bb0bf06b1449e074020a2e16bfa04.

This was a terrible way of doing this, there's got to be a better way!
This commit is contained in:
David Cantrell 2022-07-31 15:29:48 +01:00 committed by GitHub
parent b75677ab59
commit dd73447329
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 124 additions and 13 deletions

View File

@ -152,6 +152,24 @@ format = '''
\$''' \$'''
``` ```
### Negative matching
Many modules have `detect_extensions`, `detect_files`, and `detect_folders` variables. These take
lists of strings to match or not match. "Negative" options, those which should not be matched, are
indicated with a leading "!" character. The presence of _any_ negative indicator in the directory
will result in the module not being matched.
Extensions are matched against both the characters after the last dot in a filename, and the
characters after the first dot in a filename. For example, `foo.bar.tar.gz` will be matched
against `bar.tar.gz` and `gz` in the `detect_extensions` variable. Files whose name begins with a
dot are not considered to have extensions at all.
To see how this works in practice, you could match TypeScript but not MPEG Transport Stream files thus:
```toml
detect_extensions = ["ts", "!video.ts", "!audio.ts"]
```
## Prompt ## Prompt
This is the list of prompt-wide configuration options. This is the list of prompt-wide configuration options.

View File

@ -397,10 +397,27 @@ impl DirContents {
folders.insert(path); folders.insert(path);
} else { } else {
if !path.to_string_lossy().starts_with('.') { if !path.to_string_lossy().starts_with('.') {
// Extract the file extensions (yes, that's plural) from a filename.
// Why plural? Consider the case of foo.tar.gz. It's a compressed
// tarball (tar.gz), and it's a gzipped file (gz). We should be able
// to match both.
// find the minimal extension on a file. ie, the gz in foo.tar.gz
// NB the .to_string_lossy().to_string() here looks weird but is
// required to convert it from a Cow.
path.extension() path.extension()
.map(|ext| extensions.insert(ext.to_string_lossy().to_string())); .map(|ext| extensions.insert(ext.to_string_lossy().to_string()));
// find the full extension on a file. ie, the tar.gz in foo.tar.gz
path.file_name().map(|file_name| {
file_name
.to_string_lossy()
.split_once('.')
.map(|(_, after)| extensions.insert(after.to_string()))
});
} }
if let Some(file_name) = path.file_name() { if let Some(file_name) = path.file_name() {
// this .to_string_lossy().to_string() is also required
file_names.insert(file_name.to_string_lossy().to_string()); file_names.insert(file_name.to_string_lossy().to_string());
} }
files.insert(path); files.insert(path);
@ -432,24 +449,47 @@ impl DirContents {
self.file_names.contains(name) self.file_names.contains(name)
} }
pub fn has_any_file_name(&self, names: &[&str]) -> bool {
names.iter().any(|name| self.has_file_name(name))
}
pub fn has_folder(&self, path: &str) -> bool { pub fn has_folder(&self, path: &str) -> bool {
self.folders.contains(Path::new(path)) self.folders.contains(Path::new(path))
} }
pub fn has_any_folder(&self, paths: &[&str]) -> bool {
paths.iter().any(|path| self.has_folder(path))
}
pub fn has_extension(&self, ext: &str) -> bool { pub fn has_extension(&self, ext: &str) -> bool {
self.extensions.contains(ext) self.extensions.contains(ext)
} }
pub fn has_any_extension(&self, exts: &[&str]) -> bool { pub fn has_any_positive_file_name(&self, names: &[&str]) -> bool {
exts.iter().any(|ext| self.has_extension(ext)) names
.iter()
.any(|name| !name.starts_with('!') && self.has_file_name(name))
}
pub fn has_any_positive_folder(&self, paths: &[&str]) -> bool {
paths
.iter()
.any(|path| !path.starts_with('!') && self.has_folder(path))
}
pub fn has_any_positive_extension(&self, exts: &[&str]) -> bool {
exts.iter()
.any(|ext| !ext.starts_with('!') && self.has_extension(ext))
}
pub fn has_no_negative_file_name(&self, names: &[&str]) -> bool {
!names
.iter()
.any(|name| name.starts_with('!') && self.has_file_name(&name[1..]))
}
pub fn has_no_negative_folder(&self, paths: &[&str]) -> bool {
!paths
.iter()
.any(|path| path.starts_with('!') && self.has_folder(&path[1..]))
}
pub fn has_no_negative_extension(&self, exts: &[&str]) -> bool {
!exts
.iter()
.any(|ext| ext.starts_with('!') && self.has_extension(&ext[1..]))
} }
} }
@ -516,9 +556,16 @@ impl<'a> ScanDir<'a> {
/// based on the current `PathBuf` check to see /// based on the current `PathBuf` check to see
/// if any of this criteria match or exist and returning a boolean /// if any of this criteria match or exist and returning a boolean
pub fn is_match(&self) -> bool { pub fn is_match(&self) -> bool {
self.dir_contents.has_any_extension(self.extensions) // if there exists a file with a file/folder/ext we've said we don't want,
|| self.dir_contents.has_any_folder(self.folders) // fail the match straight away
|| self.dir_contents.has_any_file_name(self.files) self.dir_contents.has_no_negative_extension(self.extensions)
&& self.dir_contents.has_no_negative_file_name(self.files)
&& self.dir_contents.has_no_negative_folder(self.folders)
&& (self
.dir_contents
.has_any_positive_extension(self.extensions)
|| self.dir_contents.has_any_positive_file_name(self.files)
|| self.dir_contents.has_any_positive_folder(self.folders))
} }
} }
@ -726,6 +773,50 @@ mod tests {
.is_match()); .is_match());
node.close()?; node.close()?;
let tarballs = testdir(&["foo.tgz", "foo.tar.gz"])?;
let tarballs_dc = DirContents::from_path(tarballs.path())?;
assert!(ScanDir {
dir_contents: &tarballs_dc,
files: &[],
extensions: &["tar.gz"],
folders: &[],
}
.is_match());
tarballs.close()?;
let dont_match_ext = testdir(&["foo.js", "foo.ts"])?;
let dont_match_ext_dc = DirContents::from_path(dont_match_ext.path())?;
assert!(!ScanDir {
dir_contents: &dont_match_ext_dc,
files: &[],
extensions: &["js", "!notfound", "!ts"],
folders: &[],
}
.is_match());
dont_match_ext.close()?;
let dont_match_file = testdir(&["goodfile", "evilfile"])?;
let dont_match_file_dc = DirContents::from_path(dont_match_file.path())?;
assert!(!ScanDir {
dir_contents: &dont_match_file_dc,
files: &["goodfile", "!notfound", "!evilfile"],
extensions: &[],
folders: &[],
}
.is_match());
dont_match_file.close()?;
let dont_match_folder = testdir(&["gooddir/somefile", "evildir/somefile"])?;
let dont_match_folder_dc = DirContents::from_path(dont_match_folder.path())?;
assert!(!ScanDir {
dir_contents: &dont_match_folder_dc,
files: &[],
extensions: &[],
folders: &["gooddir", "!notfound", "!evildir"],
}
.is_match());
dont_match_folder.close()?;
Ok(()) Ok(())
} }

View File

@ -498,6 +498,8 @@ mod tests {
fn make_known_tempdir(root: &Path) -> io::Result<(TempDir, String)> { fn make_known_tempdir(root: &Path) -> io::Result<(TempDir, String)> {
fs::create_dir_all(root)?; fs::create_dir_all(root)?;
let dir = TempDir::new_in(root)?; let dir = TempDir::new_in(root)?;
// the .to_string_lossy().to_string() here looks weird but is required
// to convert it from a Cow.
let path = dir let path = dir
.path() .path()
.file_name() .file_name()