artinix/module.nix

120 lines
4.4 KiB
Nix

{
pkgs,
lib,
stdlib,
jqlib,
pythonlib, # TODO python env...
processlib,
}:
with lib;
mkModule (self: {
# A class is basically an identifier namespace.
class1 = mkClass {};
# Another class of entities. May have overlapping identifiers with class1 but there is no correlation
class2 = mkClass {};
# A repository is an attribute of an entity of a given class. It is the core data storage abstraction.
repo1 = mkRepo self.class1 stdlib.dtypes.blob {};
repo2 = mkRepo self.class1 stdlib.dtypes.json {
schema = with stdlib.dtypes.json.schema; dictKeysComplete { related = int; subkey = dictOf str (listOf str); };
};
# A repository can be declared on each class
repo3 = mkRepo self.class2 stdlib.dtypes.json {};
repo4 = mkRepo self.class2 stdlib.dtypes.blob {};
# A view is a derived repository. There are many ways to describe a transformation between inputs and outputs.
# Here we use jq to query over an input repository. We also cast it to a foreign key, which is the id of an entity of a given class.
relation = mkView self.class1 jqlib.expr {
inputs.input.repo = self.repo2;
query = "$input.related";
cast = stdlib.dtypes.foreignKeyOf self.class2;
};
# Here, we use the previous foreign key repository to create a new repository on class1 which contains data from repo3 (declared on class2)
# by specifying that the this repository should be accessed through a foreign entity.
repoRelated = mkRelated self.class1 self.relation self.repo3 {};
# The declaration of a view is a shorthand for declaring a function and binding it to some input and output repositories.
# Here we do it in long form
func1 = mkFunc pythonlib.func {
inputs.one.dtype = stdlib.dtypes.json;
inputs.two.dtype = stdlib.dtypes.blob;
outputs.return.dtype = stdlib.dtypes.json;
module = pkgs.writeText "func1.py" ''
def func1(one, two):
return {
"one": one["yay"],
"two": two.read(),
}
'';
function = "func1";
};
bind1 = mkBinding self.class1 {
func = self.func1;
inputs.one = self.repo2;
inputs.two = self.repo1;
outputs.return = self.repoRelated;
};
streamFunc = mkFunc processlib.processFunc {
inputs.stdin = {
dtype = stdlib.dtypes.json;
format = processlib.formats.streamOf processlib.formats.yaml;
};
outputs.out = {
type = stdlib.dtypes.seqOf stdlib.dtypes.blob;
format = processlib.formats.filepathOf (processlib.formats.watchdirOf processlib.formats.file);
};
executable = pkgs.writeShellScript "streamFunc.sh" ''
id=0
grep whatever | while read -r line; do
md5sum >$out/$id <<<$line
id=$((id + 1))
done
'';
};
tupleFunc = mkFunc processlib.processFunc {
inputs.stdin = {
dtype = stdlib.dtypes.json;
format = processlib.formats.streamOf processlib.formats.yaml;
};
outputs.out = {
type = stdlib.dtypes.seqOf (stdlib.dtypes.tupleOf { data = stdlib.dtypes.blob; metadata = stdlib.dtypes.json; });
format = processlib.formats.filepathOf (processlib.formats.watchdirOf (processlib.formats.tupleDirOf {
data = processlib.formats.file;
metadata = processlib.formats.yaml;
}));
};
executable = pkgs.writeShellScript "tupleFunc.sh" ''
id=0
grep whatever | while read -r line; do
mkdir -p $out/$id
md5sum >$out/$id/data <<<$line
cat >$out/$id/metadata <<EOF
parent: $localKey
filesize: $(wc -c $out/$id/data)
EOF
id=$((id + 1))
done
'';
};
bind2 = mkBinding self.class1 {
func = self.tupleFunc;
inputs.stdin = self.repo2;
output.out = stdlib.repoAdapters.seqToSpawn (stdlib.repoAdapters.tupleToRepos {
data = self.repo4;
metadata = self.repo3;
});
};
parentKey = mkView self.class2 jqlib.expr {
inputs.input.repo = self.repo3;
query = "$input.parent";
cast = stdlib.dtypes.foreignKeyOf self.class1;
};
})