Skip to content

Commit ac5a83e

Browse files
committed
Refactor import statements to use full module paths and add new pipeline and workflow utilities
1 parent aea8dc9 commit ac5a83e

File tree

10 files changed

+362
-10
lines changed

10 files changed

+362
-10
lines changed

policy_trans/carminati.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from policy import Policy
1+
from policy_trans.policy import Policy
22

33

44
class Carminati(Policy):

policy_trans/cheng.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from policy import Policy
1+
from policy_trans.policy import Policy
22
from typing import Any
33
from enum import Enum
44
from dataclasses import dataclass

policy_trans/cheng_test.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from enum import Enum
2+
3+
4+
class Wildcard(Enum):
5+
STAR = "*"
6+
PLUS = "+"
7+
QUESTION = "?"
8+
9+
10+
class Connective(Enum):
11+
AND = "AND"
12+
OR = "OR"

policy_trans/crampton.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from policy import Policy
1+
from policy_trans.policy import Policy
22

33

44
class Crampton(Policy):

policy_trans/datalog.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import json
22
import re
3-
from policy import Policy
4-
from carminati import Carminati
5-
from cheng import (
3+
from policy_trans.policy import Policy
4+
from policy_trans.carminati import Carminati
5+
from policy_trans.cheng import (
66
Cheng,
77
Connective,
88
StartingNode,
@@ -17,8 +17,8 @@
1717
SystemPolicyForUser,
1818
SystemPolicyForResource,
1919
)
20-
from crampton import Crampton
21-
from fong import Fong
20+
from policy_trans.crampton import Crampton
21+
from policy_trans.fong import Fong
2222

2323

2424
class Datalog(Policy):

policy_trans/fong.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from policy import Policy
1+
from policy_trans.policy import Policy
22

33

44
class Fong(Policy):

policy_trans/test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datalog import Datalog
1+
from policy_trans.datalog import Datalog
22

33

44
EXAMPLE_DATALOG = """{

utils/pipline.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
from typing import Protocol, Callable, Any
2+
import json
3+
from pathlib import Path
4+
import os
5+
import re
6+
import logging
7+
import pandas as pd
8+
from pandas import DataFrame
9+
10+
type Data = list[dict[str, Any]]
11+
12+
13+
# === Interfaces ===
14+
class DataLoader(Protocol):
15+
def load(self) -> Data: ...
16+
17+
18+
class Transformer(Protocol):
19+
def transform(self, data: Data) -> Data: ...
20+
21+
22+
class Exporter(Protocol):
23+
def export(self, data: Data) -> None: ...
24+
25+
26+
# === Concrete implementations ===
27+
class InMemoryLoader:
28+
def load_nl_dataset(self, dir_path: Path) -> dict[str, DataFrame]:
29+
datasets = {}
30+
for file_name in sorted(os.listdir(dir_path)):
31+
file_path = dir_path / file_name
32+
try:
33+
if file_name.endswith(".jsonl"):
34+
df = pd.read_json(str(file_path), lines=True)
35+
datasets[file_name] = df
36+
logging.info(f"Loaded {file_name} with shape: {df.shape}")
37+
else:
38+
logging.warning(
39+
f"Unsupported file format for {file_name}, skipping."
40+
)
41+
continue
42+
except Exception as e:
43+
logging.error(f"Error loading {file_name}: {e}")
44+
return datasets
45+
46+
def load_xacml_dataset(self, dir_path: Path) -> dict[str, DataFrame]:
47+
datasets = {}
48+
for file_name in sorted(os.listdir(dir_path)):
49+
file_path = dir_path / file_name
50+
try:
51+
if file_name.endswith(".xml"):
52+
with open(file_path, "r", encoding="utf-8") as file:
53+
policy_pattern = re.compile(
54+
r"<Policy\s[^>]*>[\s\S]*?<\/Policy>"
55+
)
56+
xacml_content = file.read()
57+
policies = policy_pattern.findall(xacml_content)
58+
datasets[file_name] = pd.DataFrame({"policy": policies})
59+
logging.info(
60+
f"Loaded {file_name} with {len(policies)} policies."
61+
)
62+
else:
63+
logging.warning(
64+
f"Unsupported file format for {file_name}, skipping."
65+
)
66+
continue
67+
except Exception as e:
68+
logging.error(f"Error loading {file_name}: {e}")
69+
return datasets
70+
71+
72+
class CleanMissingFields:
73+
def transform(self, data: Data) -> Data:
74+
return [row for row in data if row["age"] is not None]
75+
76+
77+
class JSONExporter:
78+
def __init__(self, filename: str):
79+
self.filename = filename
80+
81+
def export(self, data: Data) -> None:
82+
with open(self.filename, "w") as f:
83+
json.dump(data, f, indent=2)
84+
85+
86+
# === Pipeline ===
87+
class DataPipeline:
88+
def __init__(
89+
self, loader: DataLoader, transformer: Transformer, exporter: Exporter
90+
):
91+
self.loader = loader
92+
self.transformer = transformer
93+
self.exporter = exporter
94+
95+
def run(self) -> None:
96+
data = self.loader.load()
97+
clean = self.transformer.transform(data)
98+
self.exporter.export(clean)
99+
100+
101+
# === Simple DI container ===
102+
class Container:
103+
def __init__(self) -> None:
104+
self._providers: dict[str, tuple[Callable[[], Any], bool]] = {}
105+
self._singletons: dict[str, Any] = {}
106+
107+
def register(
108+
self, name: str, provider: Callable[[], Any], singleton: bool = False
109+
) -> None:
110+
self._providers[name] = (provider, singleton)
111+
112+
def resolve(self, name: str) -> Any:
113+
if name in self._singletons:
114+
return self._singletons[name]
115+
116+
if name not in self._providers:
117+
raise ValueError(f"No provider registered for '{name}'")
118+
119+
provider, singleton = self._providers[name]
120+
instance = provider()
121+
122+
if singleton:
123+
self._singletons[name] = instance
124+
125+
return instance
126+
127+
128+
# === Main runner ===
129+
def main() -> None:
130+
container = Container()
131+
132+
container.register("loader", lambda: InMemoryLoader(), singleton=True)
133+
container.register("transformer", lambda: CleanMissingFields())
134+
container.register("exporter", lambda: JSONExporter("output.json"))
135+
136+
container.register(
137+
"pipeline",
138+
lambda: DataPipeline(
139+
loader=container.resolve("loader"),
140+
transformer=container.resolve("transformer"),
141+
exporter=container.resolve("exporter"),
142+
),
143+
)
144+
145+
pipeline: DataPipeline = container.resolve("pipeline")
146+
pipeline.run()
147+
print("Pipeline finished. Output written to output.json")
148+
149+
150+
if __name__ == "__main__":
151+
main()

utils/test.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from pathlib import Path
2+
from workflow import Dataset
3+
from workflow import iter_dir
4+
5+
# from workflow import DatasetLoader
6+
from workflow import DatasetsDict
7+
8+
9+
def main() -> None:
10+
# p = Path(__file__).parent.parent / "datasets" / "litroacp" / "acre_acp.jsonl"
11+
# d = Dataset(name="acre_acp", path=p)
12+
# print(d)
13+
pp = Path(__file__).parent.parent / "datasets" / "litroacp"
14+
# print(list(pp.glob("*.jsonl")))
15+
f = iter_dir(pp)
16+
for i in f:
17+
print(i)
18+
19+
20+
if __name__ == "__main__":
21+
main()

0 commit comments

Comments
 (0)