120 lines
4.4 KiB
Nix
120 lines
4.4 KiB
Nix
{
|
|
pkgs,
|
|
lib,
|
|
stdlib,
|
|
jqlib,
|
|
pythonlib, # TODO python env...
|
|
processlib,
|
|
}:
|
|
with lib;
|
|
mkModule (self: {
|
|
# A class is basically an identifier namespace.
|
|
class1 = mkClass {};
|
|
# Another class of entities. May have overlapping identifiers with class1 but there is no correlation
|
|
class2 = mkClass {};
|
|
|
|
# A repository is an attribute of an entity of a given class. It is the core data storage abstraction.
|
|
repo1 = mkRepo self.class1 stdlib.dtypes.blob {};
|
|
repo2 = mkRepo self.class1 stdlib.dtypes.json {
|
|
schema = with stdlib.dtypes.json.schema; dictKeysComplete { related = int; subkey = dictOf str (listOf str); };
|
|
};
|
|
# A repository can be declared on each class
|
|
repo3 = mkRepo self.class2 stdlib.dtypes.json {};
|
|
repo4 = mkRepo self.class2 stdlib.dtypes.blob {};
|
|
|
|
# A view is a derived repository. There are many ways to describe a transformation between inputs and outputs.
|
|
# Here we use jq to query over an input repository. We also cast it to a foreign key, which is the id of an entity of a given class.
|
|
relation = mkView self.class1 jqlib.expr {
|
|
inputs.input.repo = self.repo2;
|
|
query = "$input.related";
|
|
cast = stdlib.dtypes.foreignKeyOf self.class2;
|
|
};
|
|
# Here, we use the previous foreign key repository to create a new repository on class1 which contains data from repo3 (declared on class2)
|
|
# by specifying that the this repository should be accessed through a foreign entity.
|
|
repoRelated = mkRelated self.class1 self.relation self.repo3 {};
|
|
|
|
# The declaration of a view is a shorthand for declaring a function and binding it to some input and output repositories.
|
|
# Here we do it in long form
|
|
func1 = mkFunc pythonlib.func {
|
|
inputs.one.dtype = stdlib.dtypes.json;
|
|
inputs.two.dtype = stdlib.dtypes.blob;
|
|
outputs.return.dtype = stdlib.dtypes.json;
|
|
|
|
module = pkgs.writeText "func1.py" ''
|
|
def func1(one, two):
|
|
return {
|
|
"one": one["yay"],
|
|
"two": two.read(),
|
|
}
|
|
'';
|
|
function = "func1";
|
|
};
|
|
bind1 = mkBinding self.class1 {
|
|
func = self.func1;
|
|
inputs.one = self.repo2;
|
|
inputs.two = self.repo1;
|
|
outputs.return = self.repoRelated;
|
|
};
|
|
|
|
streamFunc = mkFunc processlib.processFunc {
|
|
inputs.stdin = {
|
|
dtype = stdlib.dtypes.json;
|
|
format = processlib.formats.streamOf processlib.formats.yaml;
|
|
};
|
|
outputs.out = {
|
|
type = stdlib.dtypes.seqOf stdlib.dtypes.blob;
|
|
format = processlib.formats.filepathOf (processlib.formats.watchdirOf processlib.formats.file);
|
|
};
|
|
|
|
executable = pkgs.writeShellScript "streamFunc.sh" ''
|
|
id=0
|
|
grep whatever | while read -r line; do
|
|
md5sum >$out/$id <<<$line
|
|
id=$((id + 1))
|
|
done
|
|
'';
|
|
};
|
|
|
|
tupleFunc = mkFunc processlib.processFunc {
|
|
inputs.stdin = {
|
|
dtype = stdlib.dtypes.json;
|
|
format = processlib.formats.streamOf processlib.formats.yaml;
|
|
};
|
|
outputs.out = {
|
|
type = stdlib.dtypes.seqOf (stdlib.dtypes.tupleOf { data = stdlib.dtypes.blob; metadata = stdlib.dtypes.json; });
|
|
format = processlib.formats.filepathOf (processlib.formats.watchdirOf (processlib.formats.tupleDirOf {
|
|
data = processlib.formats.file;
|
|
metadata = processlib.formats.yaml;
|
|
}));
|
|
};
|
|
|
|
executable = pkgs.writeShellScript "tupleFunc.sh" ''
|
|
id=0
|
|
grep whatever | while read -r line; do
|
|
mkdir -p $out/$id
|
|
md5sum >$out/$id/data <<<$line
|
|
cat >$out/$id/metadata <<EOF
|
|
parent: $localKey
|
|
filesize: $(wc -c $out/$id/data)
|
|
EOF
|
|
id=$((id + 1))
|
|
done
|
|
'';
|
|
};
|
|
|
|
bind2 = mkBinding self.class1 {
|
|
func = self.tupleFunc;
|
|
inputs.stdin = self.repo2;
|
|
output.out = stdlib.repoAdapters.seqToSpawn (stdlib.repoAdapters.tupleToRepos {
|
|
data = self.repo4;
|
|
metadata = self.repo3;
|
|
});
|
|
};
|
|
|
|
parentKey = mkView self.class2 jqlib.expr {
|
|
inputs.input.repo = self.repo3;
|
|
query = "$input.parent";
|
|
cast = stdlib.dtypes.foreignKeyOf self.class1;
|
|
};
|
|
})
|