commit 78251080099b24531f7cfe654b89de3bb6111f15 Author: Audrey Dutcher Date: Fri Mar 7 16:59:05 2025 -0700 Initial commit diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7865674 --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2024 Audrey Dutcher + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/lib/default.nix b/lib/default.nix new file mode 100644 index 0000000..3d885c7 --- /dev/null +++ b/lib/default.nix @@ -0,0 +1,42 @@ +{ pkgs }: +let + lib = pkgs.lib; + types = { + Module = "Module"; + Class = "Class"; + DataType = "DataType"; + Repo = "Repo"; + RepoAdapter = "RepoAdapter"; + Function = "Function"; + Binding = "Binding"; + FunctionType = "FunctionType"; + }; + addType = arg: ty: arg // { __artinixType = ty; }; + needsName = arg: arg // { __artinixName = null; }; + addName = name: arg: if arg ? __artinixName && arg.__artinixName == null + then arg // { __artinixName = name; } + else arg; + addNames = lib.mapAttrs addName; + moduleExtension = final: prev: (addType (addNames prev) types.Module); + classExtension = final: prev: (addType (needsName prev) types.Class); + repoExtension = class: repoType: final: prev: (addType (needsName (repoType class prev)) types.Repo); + dataTypeExtension = final: prev: (addType (needsName prev) types.DataType) // { __functor = self: class: attrs: attrs // { __artinixRepoType = self; __artinixClass = class; }; }; + adapterExtension = final: prev: (addType (needsName prev) types.RepoAdapter); + functionExtension = functy: final: prev: (addType (needsName (functy prev)) types.Function); + bindingExtension = class: final: prev: (addType (needsName prev) types.Binding) // { __artinixClass = class; }; + functionTypeExtension = typeCons: final: prev: (addType prev types.FunctionType) // { __functor = self: args: (typeCons args) // { __artinixFunctionType = self; }; }; + constructor = extension: mod: (lib.makeExtensible mod).extend extension; +in +rec { + mkModule = constructor moduleExtension; + mkClass = constructor classExtension; + mkDataType = constructor dataTypeExtension; + mkRepo = cls: repoty: constructor (repoExtension cls repoty); + mkAdapter = constructor adapterExtension; + mkFuncType = typeCons: constructor (functionTypeExtension typeCons); + mkFunc = functy: constructor (functionExtension functy); + mkBinding = cls: constructor (bindingExtension cls); + mkView = cls: functy: args: mkBinding cls { func = mkFunc functy (builtins.removeAttrs args ["inputs" "outputs"]); inherit (args) inputs outputs; }; + mkRuntime = ({nativeFunctionTypes, nativeDataFormats, linker}: ); + mkProgram = ({runtime, bindings, storageProviders}: ); +} diff --git a/module.nix b/module.nix new file mode 100644 index 0000000..7189ff1 --- /dev/null +++ b/module.nix @@ -0,0 +1,119 @@ +{ + pkgs, + lib, + stdlib, + jqlib, + pythonlib, # TODO python env... + processlib, +}: +with lib; +mkModule (self: { + # A class is basically an identifier namespace. + class1 = mkClass {}; + # Another class of entities. May have overlapping identifiers with class1 but there is no correlation + class2 = mkClass {}; + + # A repository is an attribute of an entity of a given class. It is the core data storage abstraction. + repo1 = mkRepo self.class1 stdlib.dtypes.blob {}; + repo2 = mkRepo self.class1 stdlib.dtypes.json { + schema = with stdlib.dtypes.json.schema; dictKeysComplete { related = int; subkey = dictOf str (listOf str); }; + }; + # A repository can be declared on each class + repo3 = mkRepo self.class2 stdlib.dtypes.json {}; + repo4 = mkRepo self.class2 stdlib.dtypes.blob {}; + + # A view is a derived repository. There are many ways to describe a transformation between inputs and outputs. + # Here we use jq to query over an input repository. We also cast it to a foreign key, which is the id of an entity of a given class. + relation = mkView self.class1 jqlib.expr { + inputs.input.repo = self.repo2; + query = "$input.related"; + cast = stdlib.dtypes.foreignKeyOf self.class2; + }; + # Here, we use the previous foreign key repository to create a new repository on class1 which contains data from repo3 (declared on class2) + # by specifying that the this repository should be accessed through a foreign entity. + repoRelated = mkRelated self.class1 self.relation self.repo3 {}; + + # The declaration of a view is a shorthand for declaring a function and binding it to some input and output repositories. + # Here we do it in long form + func1 = mkFunc pythonlib.func { + inputs.one.dtype = stdlib.dtypes.json; + inputs.two.dtype = stdlib.dtypes.blob; + outputs.return.dtype = stdlib.dtypes.json; + + module = pkgs.writeText "func1.py" '' + def func1(one, two): + return { + "one": one["yay"], + "two": two.read(), + } + ''; + function = "func1"; + }; + bind1 = mkBinding self.class1 { + func = self.func1; + inputs.one = self.repo2; + inputs.two = self.repo1; + outputs.return = self.repoRelated; + }; + + streamFunc = mkFunc processlib.processFunc { + inputs.stdin = { + dtype = stdlib.dtypes.json; + format = processlib.formats.streamOf processlib.formats.yaml; + }; + outputs.out = { + type = stdlib.dtypes.seqOf stdlib.dtypes.blob; + format = processlib.formats.filepathOf (processlib.formats.watchdirOf processlib.formats.file); + }; + + executable = pkgs.writeShellScript "streamFunc.sh" '' + id=0 + grep whatever | while read -r line; do + md5sum >$out/$id <<<$line + id=$((id + 1)) + done + ''; + }; + + tupleFunc = mkFunc processlib.processFunc { + inputs.stdin = { + dtype = stdlib.dtypes.json; + format = processlib.formats.streamOf processlib.formats.yaml; + }; + outputs.out = { + type = stdlib.dtypes.seqOf (stdlib.dtypes.tupleOf { data = stdlib.dtypes.blob; metadata = stdlib.dtypes.json; }); + format = processlib.formats.filepathOf (processlib.formats.watchdirOf (processlib.formats.tupleDirOf { + data = processlib.formats.file; + metadata = processlib.formats.yaml; + })); + }; + + executable = pkgs.writeShellScript "tupleFunc.sh" '' + id=0 + grep whatever | while read -r line; do + mkdir -p $out/$id + md5sum >$out/$id/data <<<$line + cat >$out/$id/metadata < Iterable[str]: + ... + + def has(self, ident: str) -> bool: + ... + + +class ArtinixFormat(Protocol): + def get(self, storage: ArtinixStorage, ident: str) -> Any: + ... + + def reader(self, storage: ArtinixStorage, ident: str) -> Any | None: + ... + + def put(self, storage: ArtinixStorage, ident: str, data: Any) -> None: + ... + + def writer(self, storage: ArtinixStorage, ident: str) -> Any | None: + ... + + def filter(self, storage: type[ArtinixStorage]) -> bool: + ... + +class ArtinixFunction(Protocol): + def __call__(self, inputs: dict[str, Any]) -> dict[str, Any]: + ... + + input_formats: dict[str, ArtinixFormat] + output_formats: dict[str, ArtinixFormat] + +class ArtinixPythonFunction: + def __init__(self, module_path: str, func_name: str, input_formats: dict[str, ArtinixFormat], output_formats: dict[str, ArtinixFormat]): + self.module_path = module_path + self.func_name = func_name + self.input_formats = input_formats + self.output_formats = output_formats + + module = PYTHON_MODULE_CACHE.get(self.module_path, None) + if module is None: + spec = importlib.util.spec_from_file_location(module_path, module_path) + if spec is None: + raise FileNotFoundError(f"{module_path} not found") + assert spec.loader is not None, "What does it even mean when spec.loader is None?" + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + PYTHON_MODULE_CACHE[module_path] = module + self.module = module + self.function = getattr(module, self.func_name) + + def __call__(self, inputs: dict[str, Any]) -> dict[str, Any]: + return {"return": self.function(**inputs)} + +@dataclass(frozen=True) +class ArtinixEntityClass: + name: str + +@dataclass(frozen=True) +class ArtinixEntity: + ident: str + klass: ArtinixEntityClass + + +class ArtinixRepository: + def __init__(self, klass: ArtinixEntityClass, name: str, dtype: str): + self.klass = klass + self.name = name + self.dtype = dtype + +class ArtinixBinding: + def __init__(self, klass: ArtinixEntityClass, function: ArtinixFunction, inputs: dict[str, ArtinixRepository], outputs: dict[str, ArtinixRepository]): + self.klass = klass + self.function = function + self.inputs = inputs + self.outputs = outputs + + assert inputs, "Binding must have inputs so we know when to trigger it" + +class ArtinixPythonRuntime: + def __init__(self, + *, + argv0: str, + default_setup: bool, + manual_setup: bool, + bindings: list[ArtinixBinding], + datastore: dict[ArtinixRepository, ArtinixStorage], + ): + + self.bindings = bindings + self.datastore = datastore + + self.parser = argparse.ArgumentParser(argv0) + subparsers = self.parser.add_subparsers() + + if manual_setup: + args_setup = subparsers.add_parser("setup") + args_setup.set_defaults(func=self.func_setup) + + if default_setup: + args_run = subparsers.add_parser("run") + args_run.set_defaults(func=self.func_run) + + def spawn_entity(self, klass: ArtinixEntityClass) -> ArtinixEntity: + return ArtinixEntity(ident = ''.join(random.choice(string.ascii_lowercase) for _ in range(8)), klass=klass) + + def binding_ready(self, binding: ArtinixBinding) -> list[ArtinixEntity]: + ready = None + for input_repo in binding.inputs.values(): + storage = self.datastore[input_repo] + if ready is None: + ready = set(storage.members()) + else: + ready.intersection_update(storage.members()) + + assert ready is not None + for output_repo in binding.outputs.values(): + storage = self.datastore[output_repo] + ready.difference_update(storage.members()) + + return [ArtinixEntity(ident=ident, klass=binding.klass) for ident in ready] + + def binding_exec(self, binding: ArtinixBinding, entity: ArtinixEntity): + inputs = {input_name: binding.function.input_formats[input_name].get(self.datastore[input_repo], entity.ident) for input_name, input_repo in binding.inputs.items()} + outputs = binding.function(inputs) + for output_name, output_data in outputs.items(): + binding.function.output_formats[output_name].put(self.datastore[binding.outputs[output_name]], entity.ident, output_data) + + def main(self, args: list[str]): + parsed = self.parser.parse_args(args) + func_args = vars(parsed) + func = func_args.pop("func") + return func(**func_args) + + def func_setup(self): + raise NotImplementedError() + + def func_run(self): + raise NotImplementedError() diff --git a/python/src/artinix_python/formats.py b/python/src/artinix_python/formats.py new file mode 100644 index 0000000..6d6c0f7 --- /dev/null +++ b/python/src/artinix_python/formats.py @@ -0,0 +1,100 @@ +from __future__ import annotations +from typing import TYPE_CHECKING, Any +from io import BytesIO +import json + +from .storage import * + +if TYPE_CHECKING: + from . import ArtinixStorage + +__all__ = ('ArtinixJsonObjectFormat', 'ArtinixBlobStreamFormat', 'ArtinixBlobStringFormat') + + +class ArtinixJsonObjectFormat: + def filter(self, storage: type[ArtinixStorage]) -> bool: + return issubclass(storage, (ArtinixInMemoryStorage, ArtinixFileStorage)) + + def get(self, storage: ArtinixStorage, ident: str): + if isinstance(storage, ArtinixInMemoryStorage): + data = storage.data[ident] + assert isinstance(data, (int, float, bool, str, list, dict)) + return data + if isinstance(storage, ArtinixFileStorage): + with open(storage.basedir / ident, 'r') as fp: + return json.load(fp) + raise NotImplementedError("Forgot a case") + + def put(self, storage: ArtinixStorage, ident: str, data: Any) -> None: + assert isinstance(data, (int, float, bool, str, list, dict)) + if isinstance(storage, ArtinixInMemoryStorage): + storage.data[ident] = data + if isinstance(storage, ArtinixFileStorage): + with open(storage.basedir / ident, 'w') as fp: + return json.dump(data, fp) + raise NotImplementedError("Forgot a case") + + def writer(self, storage, ident): + return None + + def reader(self, storage, ident): + return None + +class ArtinixBlobStreamFormat: + def filter(self, storage: type[ArtinixStorage]): + return issubclass(storage, (ArtinixInMemoryStorage, ArtinixFileStorage)) + + def get(self, storage, ident): + raise TypeError("Please use reader") + + def put(self, storage, ident): + raise TypeError("Please use writer") + + def reader(self, storage: ArtinixStorage, ident: str): + if isinstance(storage, ArtinixInMemoryStorage): + data = storage.data[ident] + assert isinstance(data, BytesIO) + return data + if isinstance(storage, ArtinixFileStorage): + return open(storage.basedir / ident, 'rb') + raise NotImplementedError("Forgot a case") + + def writer(self, storage: ArtinixStorage, ident: str): + if isinstance(storage, ArtinixInMemoryStorage): + data = storage.data[ident] + assert isinstance(data, BytesIO) + return data + if isinstance(storage, ArtinixFileStorage): + return open(storage.basedir / ident, 'wb') + raise NotImplementedError("Forgot a case") + +class ArtinixBlobStringFormat: + def filter(self, storage: type[ArtinixStorage]) -> bool: + return issubclass(storage, (ArtinixInMemoryStorage, ArtinixFileStorage)) + + def get(self, storage: ArtinixStorage, ident: str): + if isinstance(storage, ArtinixInMemoryStorage): + data = storage.data[ident] + assert isinstance(data, BytesIO) + return data.getvalue() + if isinstance(storage, ArtinixFileStorage): + with open(storage.basedir / ident, 'rb') as fp: + return fp.read() + raise NotImplementedError("Forgot a case") + + def put(self, storage: ArtinixStorage, ident: str, data: Any) -> None: + assert isinstance(data, bytes) + if isinstance(storage, ArtinixInMemoryStorage): + storage.data[ident] = BytesIO(data) + return + if isinstance(storage, ArtinixFileStorage): + with open(storage.basedir / ident, 'wb') as fp: + fp.write(data) + return + raise NotImplementedError("Forgot a case") + + def writer(self, storage, ident): + return None + + def reader(self, storage, ident): + return None diff --git a/python/src/artinix_python/storage.py b/python/src/artinix_python/storage.py new file mode 100644 index 0000000..212a444 --- /dev/null +++ b/python/src/artinix_python/storage.py @@ -0,0 +1,24 @@ +from typing import Any, Iterable +from pathlib import Path + +__all__ = ('ArtinixInMemoryStorage', 'ArtinixFileStorage') + +class ArtinixInMemoryStorage: + def __init__(self): + self.data: dict[str, Any] = {} + + def members(self) -> Iterable[str]: + return self.data.keys() + + def has(self, ident: str) -> bool: + return ident in self.data + +class ArtinixFileStorage: + def __init__(self, basedir: Path): + self.basedir = basedir + + def members(self) -> Iterable[str]: + return [x.name for x in self.basedir.iterdir()] + + def has(self, ident: str) -> bool: + return (self.basedir / ident).exists() diff --git a/python/src/driver.py.template b/python/src/driver.py.template new file mode 100644 index 0000000..caa54a3 --- /dev/null +++ b/python/src/driver.py.template @@ -0,0 +1,55 @@ +#!@interpreter@ + +import sys + +from artinix_python import * + +# +# Entity Classes +# + +entity_classes = [] +@entityClassSetup@ + +# +# Repositories +# + +repositories = [] +@repositorySetup@ + +# +# Functions +# + +functions = [] +@functionSetup@ + +# +# Bindings +# + +bindings = [] +@bindingSetup@ + +# +# Datastore setup +# + +datastore = {} +@datastoreSetup@ + +# +# Go! +# + +runtime = ArtinixPythonRuntime( + argv0=sys.argv[0], + default_setup = @allowDefault@, + manual_setup = @allowManual@, + bindings=bindings, + datastore=datastore, +) + +if __name__ == '__main__': + runtime.main(sys.argv[1:]) diff --git a/stdlib.nix b/stdlib.nix new file mode 100644 index 0000000..1da159d --- /dev/null +++ b/stdlib.nix @@ -0,0 +1,25 @@ +{ + lib, +}: +with lib; +mkModule (self: { + dtypes = mkModule (self': { + blob = mkRepoType {}; + json = mkRepoType { + schema = { + int = { type = "int"; }; + str = { type = "str"; }; + listOf = ety: { type = "list"; inherit ety; }; + dictOf = kty: vty: { type = "dict"; inherit kty vty; }; + }; + }; + filesystem = mkRepoType {}; + foreignKeyOf = foreignClass: mkRepoType { class = foreignClass; }; + seqOf = ty: mkRepoType { inherit ty; }; + tupleOf = children: mkRepoType { inherit children; }; + }); + repoAdapters = mkModule (self': { + seqToSpawn = mkAdapter { /* TODO */ }; + tupleToRepos = mkAdapter { /* TODO */ }; + }); +})