diff --git a/content/build/guides/meta.json b/content/build/guides/meta.json
index 5eb8092fa..1d52d2ebe 100644
--- a/content/build/guides/meta.json
+++ b/content/build/guides/meta.json
@@ -9,6 +9,7 @@
"crossmint-nft-minting-app",
"working-with-arns",
"using-turbo-in-a-browser",
- "storing-nfts"
+ "storing-nfts",
+ "verifiable-ai"
]
}
diff --git a/content/build/guides/verifiable-ai/immutable-trust-layer.mdx b/content/build/guides/verifiable-ai/immutable-trust-layer.mdx
new file mode 100644
index 000000000..652d7c3c1
--- /dev/null
+++ b/content/build/guides/verifiable-ai/immutable-trust-layer.mdx
@@ -0,0 +1,47 @@
+---
+title: "The Immutable Trust Layer"
+description: "Implement a Lambda Architecture for AI logging that creates tamper-proof audit trails for algorithmic liability"
+---
+
+import { Callout } from "fumadocs-ui/components/callout";
+import { Steps, Step } from "fumadocs-ui/components/steps";
+
+## Output and Liability Verification for AI Systems
+
+Learn how to implement a Lambda Architecture for AI logging that streams encrypted evidence and builds analytics indices for complete algorithmic accountability.
+
+## Prerequisites
+
+Before starting, ensure you have:
+
+- **Node.js** (v18 or higher)
+- **TypeScript** knowledge
+- **Arweave Wallet (JWK file)** - We recommend [Wander](https://www.wander.app/)
+- **Turbo Credits** - Purchase credits to pay for uploads. See [Turbo Credits guide](/build/upload/turbo-credits)
+- Completed [The Verifiable Dataset](/build/guides/verifiable-ai/verifiable-dataset) guide
+- Completed [The Signed Model Registry](/build/guides/verifiable-ai/signed-model-registry) guide
+
+## Overview
+
+This guide covers:
+
+- Implementing the Speed Layer for real-time evidence streaming
+- Building the Batch Layer with Parquet indices for analytics
+- Creating tamper-proof audit trails
+- Encrypting sensitive AI outputs
+- Querying historical AI decisions
+- Establishing algorithmic liability frameworks
+
+
+Content for this guide is coming soon. Check back later for the complete walkthrough.
+
+
+## Summary
+
+By completing this guide series, you've built a complete verifiable AI infrastructure with:
+
+- **Verifiable Datasets** ensuring input integrity
+- **Signed Model Registry** preventing process drift
+- **Immutable Trust Layer** providing output accountability
+
+Your AI systems now have cryptographic proof of their decisions, creating a foundation for enterprise trust and regulatory compliance.
diff --git a/content/build/guides/verifiable-ai/index.mdx b/content/build/guides/verifiable-ai/index.mdx
new file mode 100644
index 000000000..b0a03421f
--- /dev/null
+++ b/content/build/guides/verifiable-ai/index.mdx
@@ -0,0 +1,52 @@
+---
+title: "Verifiable AI with AR.IO Network"
+description: "Build production-grade verifiable AI systems with immutable data provenance, signed model registries, and tamper-proof audit trails"
+---
+
+import {
+ Database,
+ Shield,
+ FileCheck,
+ Lock,
+} from "lucide-react";
+
+## From Black Box to Glass Box: The Verifiable AI Stack
+
+The challenge with Enterprise AI is not just performance, but **provenance**. Standard cloud storage is mutable, making it difficult to prove exactly which dataset trained a model or what precise state an AI agent was in during a specific incident.
+
+To solve **Algorithmic Liability**, AI systems require an immutable root of trust.
+
+AR.IO Network facilitates this by enabling a **"Glass Box"** architecture:
+
+- **Verifiable Datasets**: Prove the integrity of training data, whether it lives on S3 or directly on Arweave.
+- **Signed Model Registries**: Prevent model drift by verifying weights against on-chain proofs before inference starts.
+- **The Trust Layer**: A "Lambda Architecture" for logging that streams encrypted evidence for liability (Speed Layer) and builds Parquet indices for analytics (Batch Layer).
+
+## What You'll Learn
+
+In this guide series, you will build a production-grade **Verifiable AI Stack** using TypeScript and the Turbo SDK.
+
+
+ }
+ />
+ }
+ />
+ }
+ />
+
+
+Each guide builds on the last, creating a complete verifiable AI infrastructure by the end of the series.
+
+Let's get started.
diff --git a/content/build/guides/verifiable-ai/meta.json b/content/build/guides/verifiable-ai/meta.json
new file mode 100644
index 000000000..28b5e63c9
--- /dev/null
+++ b/content/build/guides/verifiable-ai/meta.json
@@ -0,0 +1,9 @@
+{
+ "title": "Verifiable AI",
+ "defaultOpen": false,
+ "pages": [
+ "verifiable-dataset",
+ "signed-model-registry",
+ "immutable-trust-layer"
+ ]
+}
diff --git a/content/build/guides/verifiable-ai/signed-model-registry.mdx b/content/build/guides/verifiable-ai/signed-model-registry.mdx
new file mode 100644
index 000000000..9331a6396
--- /dev/null
+++ b/content/build/guides/verifiable-ai/signed-model-registry.mdx
@@ -0,0 +1,39 @@
+---
+title: "The Signed Model Registry"
+description: "Build a registry that prevents model drift by verifying weights against on-chain proofs before inference starts"
+---
+
+import { Callout } from "fumadocs-ui/components/callout";
+import { Steps, Step } from "fumadocs-ui/components/steps";
+
+## Process Verification for AI Models
+
+Learn how to build a signed model registry that prevents model drift by verifying weights against on-chain proofs before inference starts.
+
+## Prerequisites
+
+Before starting, ensure you have:
+
+- **Node.js** (v18 or higher)
+- **TypeScript** knowledge
+- **Arweave Wallet (JWK file)** - We recommend [Wander](https://www.wander.app/)
+- **Turbo Credits** - Purchase credits to pay for uploads. See [Turbo Credits guide](/build/upload/turbo-credits)
+- Completed [The Verifiable Dataset](/build/guides/verifiable-ai/verifiable-dataset) guide
+
+## Overview
+
+This guide covers:
+
+- Creating cryptographic signatures for model weights
+- Storing model metadata on Arweave
+- Verifying model integrity before inference
+- Implementing a model registry service
+- Preventing model drift and tampering
+
+
+Content for this guide is coming soon. Check back later for the complete walkthrough.
+
+
+## Next Steps
+
+After completing this guide, proceed to [The Immutable Trust Layer](/build/guides/verifiable-ai/immutable-trust-layer) to learn how to create tamper-proof audit trails.
diff --git a/content/build/guides/verifiable-ai/verifiable-dataset.mdx b/content/build/guides/verifiable-ai/verifiable-dataset.mdx
new file mode 100644
index 000000000..ebe387cc1
--- /dev/null
+++ b/content/build/guides/verifiable-ai/verifiable-dataset.mdx
@@ -0,0 +1,519 @@
+---
+title: "The Verifiable Dataset"
+description: "Create tamper-proof datasets with cryptographic proofs, ensuring data integrity from S3 to Arweave for AI training"
+---
+
+import { Callout } from "fumadocs-ui/components/callout";
+import { Steps, Step } from "fumadocs-ui/components/steps";
+import { Tabs, Tab } from "fumadocs-ui/components/tabs";
+
+Training data is the foundation of any AI model. To ensure provenance, you must be able to prove exactly what data was used to train a specific model version.
+
+We provide two patterns depending on your data size and requirements:
+
+- **The Holographic Anchor**: Best for massive data (TB/PB) stored on S3.
+- **The Native Data Lake**: Best for high-value data (under 1TB) stored directly on Arweave with a Parquet index.
+
+## Prerequisites
+
+Before starting, ensure you have:
+
+- **Node.js** (v18 or higher)
+- **Arweave Wallet (JWK file)** - We recommend [Wander](https://www.wander.app/)
+- **Turbo Credits** - Purchase credits to pay for uploads. See [Turbo Credits guide](/build/upload/turbo-credits)
+- **TypeScript** knowledge
+
+### Install Dependencies
+
+```bash
+npm install @ardrive/turbo-sdk parquetjs @ar.io/wayfinder-core @ar.io/sdk
+npm install --save-dev @types/node
+```
+
+## Pattern A: The Holographic Anchor (Off-Chain)
+
+Use this when your dataset is petabyte-scale or must reside in a specific jurisdiction (GDPR).
+
+With this pattern, we do not upload the actual file. We upload a **cryptographic fingerprint**.
+
+
+
+### Generate Cryptographic Proof
+
+Create a file `anchor-dataset.ts` to generate a SHA-256 hash of your dataset:
+
+```typescript
+import { TurboFactory } from '@ardrive/turbo-sdk';
+import * as fs from 'fs';
+import * as crypto from 'crypto';
+
+export async function createHolographicAnchor(filePath: string, s3Url: string) {
+ // Setup Turbo client
+ const jwk = JSON.parse(fs.readFileSync('wallet.json', 'utf-8'));
+ const turbo = TurboFactory.authenticated({
+ privateKey: jwk,
+ token: 'arweave'
+ });
+
+ console.log("1. Generating Cryptographic Proof...");
+
+ // Hash stream (Efficient for large files, low RAM usage)
+ const hash = crypto.createHash('sha256');
+ const fileStream = fs.createReadStream(filePath);
+
+ // Stream the file through the hash
+ for await (const chunk of fileStream) {
+ hash.update(chunk);
+ }
+
+ const fingerprint = hash.digest('hex');
+
+ // 2. Prepare the Anchor Payload
+ const anchor = {
+ type: 'dataset_anchor',
+ storage: 's3',
+ url: s3Url,
+ sha256: fingerprint, // The mathematical truth
+ size: fs.statSync(filePath).size,
+ timestamp: Date.now()
+ };
+
+ // 3. Upload Metadata Only
+ const upload = await turbo.uploadFile({
+ fileStreamFactory: () => Buffer.from(JSON.stringify(anchor)),
+ fileSizeFactory: () => Buffer.byteLength(JSON.stringify(anchor)),
+ dataItemOpts: {
+ tags: [
+ { name: 'Content-Type', value: 'application/json' },
+ { name: 'Type', value: 'Dataset-Anchor' }
+ ]
+ }
+ });
+
+ console.log(`⚓ Holographic Anchor Minted: ar://${upload.id}`);
+ return upload.id;
+}
+```
+
+
+This approach is memory-efficient for large files. The stream hashing means you can verify petabyte-scale datasets without loading them entirely into RAM.
+
+
+
+
+### Addressing data with ArNS
+
+Instead of hardcoding anchor IDs, use ArNS to create a stable reference that always points to the latest anchor version and maintains a permanent version history.
+
+```typescript
+import { ARIO, ANT } from '@ar.io/sdk';
+
+async function associateAnchorWithArNS(anchorId: string, arnsName: string, version: string) {
+ // 1. Get the ANT contract for your ArNS name
+ // (Assumes you've already purchased/leased the ArNS name via https://arns.app)
+ const ario = ARIO.mainnet();
+ const records = await ario.getArNSRecord({ name: arnsName });
+
+ if (!records) {
+ throw new Error(`ArNS name "${arnsName}" not found. Purchase it at https://arns.app first.`);
+ }
+
+ // 2. Connect to the ANT contract
+ const ant = ANT.init({
+ processId: records.processId,
+ signer: jwk, // Your Arweave wallet
+ });
+
+ // 3. Set the @ record to point to latest version
+ await ant.setRecord({
+ undername: '@',
+ transactionId: anchorId,
+ ttlSeconds: 3600, // 1 hour cache
+ });
+
+ // 4. Set a versioned undername to permanently reference this version
+ await ant.setRecord({
+ undername: version, // e.g., 'v1', 'v2', '2024-12'
+ transactionId: anchorId,
+ ttlSeconds: 3600,
+ });
+
+ console.log(` Latest: ar://${arnsName} → ${anchorId}`);
+ console.log(` Version: ar://${version}_${arnsName} → ${anchorId}`);
+
+ return anchorId;
+}
+```
+
+
+
+
+### Verify the Dataset
+
+The anchor transaction ID serves as an immutable proof that:
+1. A specific dataset existed at a specific time
+2. The dataset had a specific SHA-256 hash
+3. The dataset was stored at a specific S3 URL
+
+Anyone can verify the dataset hasn't changed by re-hashing the S3 file and comparing it to the on-chain fingerprint.
+
+
+
+
+```typescript
+import * as crypto from 'crypto';
+import * as fs from 'fs';
+
+// Simple approach - faster to implement but single point of failure
+// Query the latest version or a specific version with ArNS
+async function verifyDataset(identifier: string, localFilePath: string) {
+ // 1. Fetch the anchor from Arweave
+ // Use ArNS name for latest version: ar://dataset-anchor
+ // Use versioned undername for specific version: ar://v1_dataset-anchor
+ const anchorData = await fetch(`https://arweave.net/${identifier}`);
+ const anchor = await anchorData.json();
+
+ // 2. Hash the local file
+ const hash = crypto.createHash('sha256');
+ const fileStream = fs.createReadStream(localFilePath);
+
+ for await (const chunk of fileStream) {
+ hash.update(chunk);
+ }
+
+ const localFingerprint = hash.digest('hex');
+
+ // 3. Compare
+ if (localFingerprint === anchor.sha256) {
+ console.log('✅ Dataset verified! Matches on-chain anchor.');
+ return true;
+ } else {
+ console.log('❌ Dataset verification failed! File has been modified.');
+ return false;
+ }
+}
+```
+
+
+
+
+
+```typescript
+import { createWayfinderClient, PreferredWithFallbackRoutingStrategy, FastestPingRoutingStrategy, NetworkGatewaysProvider } from '@ar.io/wayfinder-core';
+import { ARIO } from '@ar.io/sdk';
+import * as crypto from 'crypto';
+import * as fs from 'fs';
+
+// Production approach - preferred gateway with network fallback for resilience
+// Query the latest version or a specific version with ArNS
+async function verifyDataset(identifier: string, localFilePath: string) {
+ // 1. Setup Wayfinder: tries arweave.net first, falls back to top 10 staked gateways
+ const wayfinder = createWayfinderClient({
+ ario: ARIO.mainnet(),
+ routingStrategy: new PreferredWithFallbackRoutingStrategy({
+ preferredGateway: 'https://arweave.net',
+ fallbackStrategy: new FastestPingRoutingStrategy({
+ timeoutMs: 1000,
+ gatewaysProvider: new NetworkGatewaysProvider({
+ ario: ARIO.mainnet(),
+ sortBy: 'operatorStake',
+ limit: 10,
+ }),
+ }),
+ }),
+ });
+
+ // 2. Fetch the anchor from Arweave via Wayfinder
+ // Use ArNS name for latest version: ar://dataset-anchor
+ // Use versioned undername for specific version: ar://v1_dataset-anchor
+ const anchorData = await wayfinder.request(`ar://${identifier}`);
+ const anchor = await anchorData.json();
+
+ // 3. Hash the local file
+ const hash = crypto.createHash('sha256');
+ const fileStream = fs.createReadStream(localFilePath);
+
+ for await (const chunk of fileStream) {
+ hash.update(chunk);
+ }
+
+ const localFingerprint = hash.digest('hex');
+
+ // 4. Compare
+ if (localFingerprint === anchor.sha256) {
+ console.log('✅ Dataset verified! Matches on-chain anchor.');
+ return true;
+ } else {
+ console.log('❌ Dataset verification failed! File has been modified.');
+ return false;
+ }
+}
+```
+
+
+
+
+
+
+
+**Important**: The Holographic Anchor proves a dataset existed with a specific hash, but doesn't make the data itself permanent. For true permanence, use Pattern B.
+
+
+## Pattern B: The Native Data Lake (On-Chain)
+
+Use this for fine-tuning sets, RAG Knowledge Bases, or benchmarks where you want both the data and its index permanently stored.
+
+We upload the raw files to Arweave and generate a Parquet Index. This allows training scripts to filter data (e.g., "Give me only train split images") without downloading the entire dataset manifest.
+
+
+
+### Upload Files and Build Index
+
+Create a file `upload-native-lake.ts`:
+
+```typescript
+import { TurboFactory } from '@ardrive/turbo-sdk';
+import * as parquet from 'parquetjs';
+import * as fs from 'fs';
+import * as path from 'path';
+
+// Schema: We verify NOT just the ID, but the content metadata too
+const schema = new parquet.ParquetSchema({
+ filename: { type: 'UTF8' },
+ tx_id: { type: 'UTF8' }, // The Arweave Pointer
+ byte_size: { type: 'INT64' },
+ dataset_split: { type: 'UTF8' }, // 'train' vs 'test'
+ label: { type: 'UTF8' } // e.g. 'pneumonia'
+});
+
+export async function uploadDatasetWithIndex(baseDir: string) {
+ const jwk = JSON.parse(fs.readFileSync('wallet.json', 'utf-8'));
+ const turbo = TurboFactory.authenticated({
+ privateKey: jwk,
+ token: 'arweave'
+ });
+
+ const indexRows = [];
+ const files = fs.readdirSync(baseDir);
+
+ console.log(`🚀 Processing ${files.length} files...`);
+
+ // 1. Upload Files
+ for (const file of files) {
+ const filePath = path.join(baseDir, file);
+ const size = fs.statSync(filePath).size;
+
+ // Example logic to determine label/split from filename - customize for your dataset
+ const isTrain = file.startsWith('train');
+ const label = file.includes('cat') ? 'cat' : 'dog';
+
+ const upload = await turbo.uploadFile({
+ fileStreamFactory: () => fs.createReadStream(filePath),
+ fileSizeFactory: () => size,
+ dataItemOpts: { tags: [{ name: 'Content-Type', value: 'image/jpeg' }] }
+ });
+
+ // Add to Index (Don't just list it, describe it)
+ indexRows.push({
+ filename: file,
+ tx_id: upload.id,
+ byte_size: size,
+ dataset_split: isTrain ? 'train' : 'test',
+ label: label
+ });
+
+ console.log(` ✓ Uploaded: ${file}`);
+ }
+
+ // 2. Write Parquet Index
+ const indexFile = 'dataset_manifest.parquet';
+ const writer = await parquet.ParquetWriter.openFile(schema, indexFile);
+ for (const row of indexRows) await writer.appendRow(row);
+ await writer.close();
+
+ // 3. Upload the Index
+ const manifestUpload = await turbo.uploadFile({
+ fileStreamFactory: () => fs.createReadStream(indexFile),
+ fileSizeFactory: () => fs.statSync(indexFile).size,
+ dataItemOpts: {
+ tags: [
+ { name: 'Type', value: 'Dataset-Parquet-Manifest' },
+ { name: 'Content-Type', value: 'application/octet-stream' }
+ ]
+ }
+ });
+
+ console.log(`\n🎉 Data Lake Created!`);
+ console.log(`👉 Index ID: ar://${manifestUpload.id}`);
+
+ return manifestUpload.id;
+}
+```
+
+
+
+### Addressing data with ArNS
+
+Instead of hardcoding manifest IDs, use ArNS to create a stable reference that always points to the latest dataset version and maintains a permanent version history.
+
+```typescript
+import { ARIO, ANT } from '@ar.io/sdk';
+
+async function associateDatasetWithArNS(manifestId: string, arnsName: string, version: string) {
+ // 1. Get the ANT contract for your ArNS name
+ // (Assumes you've already purchased/leased the ArNS name via https://arns.app)
+ const ario = ARIO.mainnet();
+ const records = await ario.getArNSRecord({ name: arnsName });
+
+ if (!records) {
+ throw new Error(`ArNS name "${arnsName}" not found. Purchase it at https://arns.app first.`);
+ }
+
+ // 2. Connect to the ANT contract
+ const ant = ANT.init({
+ processId: records.processId,
+ signer: jwk, // Your Arweave wallet
+ });
+
+ // 3. Set the @ record to point to latest version
+ await ant.setRecord({
+ undername: '@',
+ transactionId: manifestId,
+ ttlSeconds: 3600, // 1 hour cache
+ });
+
+ // 4. Set a versioned undername to permanently reference this version
+ await ant.setRecord({
+ undername: version, // e.g., 'v1', 'v2', '2024-12'
+ transactionId: manifestId,
+ ttlSeconds: 3600,
+ });
+
+ console.log(` Latest: ar://${arnsName} → ${manifestId}`);
+ console.log(` Version: ar://${version}_${arnsName} → ${manifestId}`);
+
+ return manifestId;
+}
+```
+
+
+
+
+### Query the Index
+
+Training scripts can now query the Parquet index to fetch specific subsets:
+
+
+
+
+```typescript
+import * as parquet from 'parquetjs';
+
+// Simple approach - faster to implement but single point of failure
+// Query the latest version or a specific version with ArNS
+async function getTrainingImages(identifier: string) {
+ // 1. Download the Parquet index
+ // Use ArNS name for latest version: ar://medical-imaging
+ // Use versioned undername for specific version: ar://v1_medical-imaging
+ const indexData = await fetch(`https://arweave.net/${identifier}`);
+ const buffer = await indexData.arrayBuffer();
+
+ // 2. Query for training split
+ const reader = await parquet.ParquetReader.openBuffer(Buffer.from(buffer));
+ const cursor = reader.getCursor();
+
+ const trainingImages = [];
+ let record = null;
+
+ while (record = await cursor.next()) {
+ if (record.dataset_split === 'train') {
+ trainingImages.push({
+ url: `ar://${record.tx_id}`,
+ label: record.label,
+ size: record.byte_size
+ });
+ }
+ }
+
+ await reader.close();
+ return trainingImages;
+}
+```
+
+
+
+
+
+```typescript
+import * as parquet from 'parquetjs';
+import { createWayfinderClient, PreferredWithFallbackRoutingStrategy, FastestPingRoutingStrategy, NetworkGatewaysProvider } from '@ar.io/wayfinder-core';
+import { ARIO } from '@ar.io/sdk';
+
+// Production approach - keeps your training pipeline operational even during gateway outages
+// Query the latest version or a specific version with ArNS
+async function getTrainingImages(identifier: string) {
+ // 1. Setup Wayfinder: tries arweave.net first, falls back to top 10 staked gateways
+ const wayfinder = createWayfinderClient({
+ ario: ARIO.mainnet(),
+ routingStrategy: new PreferredWithFallbackRoutingStrategy({
+ preferredGateway: 'https://arweave.net',
+ fallbackStrategy: new FastestPingRoutingStrategy({
+ timeoutMs: 1000,
+ gatewaysProvider: new NetworkGatewaysProvider({
+ ario: ARIO.mainnet(),
+ sortBy: 'operatorStake',
+ limit: 10,
+ }),
+ }),
+ }),
+ });
+
+ // 2. Download the Parquet index
+ // Use ArNS name for latest version: ar://medical-imaging
+ // Use versioned undername for specific version: ar://v1_medical-imaging
+ const indexData = await wayfinder.request(`ar://${identifier}`);
+ const buffer = await indexData.arrayBuffer();
+
+ // 3. Query for training split
+ const reader = await parquet.ParquetReader.openBuffer(Buffer.from(buffer));
+ const cursor = reader.getCursor();
+
+ const trainingImages = [];
+ let record = null;
+
+ while (record = await cursor.next()) {
+ if (record.dataset_split === 'train') {
+ trainingImages.push({
+ url: `ar://${record.tx_id}`,
+ label: record.label,
+ size: record.byte_size
+ });
+ }
+ }
+
+ await reader.close();
+ return trainingImages;
+}
+```
+
+
+
+
+
+**Performance Tip**: The Parquet format allows efficient columnar queries, meaning you can filter millions of records without loading the entire dataset into memory.
+
+
+
+
+## Summary
+
+You now have two patterns for creating verifiable datasets:
+
+1. **Holographic Anchor**: For massive datasets that must stay on S3, create an immutable cryptographic fingerprint on Arweave.
+2. **Native Data Lake**: For smaller, high-value datasets, store both the data and a queryable Parquet index permanently on Arweave.
+
+Both patterns provide cryptographic proof of exactly what data was used to train your AI models, solving the provenance problem for Enterprise AI.
+
+## Next Steps
+
+Now that you have verifiable datasets, proceed to [The Signed Model Registry](/build/guides/verifiable-ai/signed-model-registry) to learn how to prevent model drift by verifying weights against on-chain proofs.