A Swift implementation of Q-Learning with Dyna-Q planning, optimized for Apple Silicon using MLX. Perfect for building adaptive iOS apps that learn user preferences over time.
- Tabular Q-Learning - Classic reinforcement learning for discrete state/action spaces
- Dyna-Q Planning - Model-based learning for improved sample efficiency
- MLX-Powered - Leverages Apple's MLX framework for efficient computation
- iOS Ready - Designed for real-world iOS app integration
- macOS 14.0+ / iOS 17.0+
- Apple Silicon (M1/M2/M3/M4)
- Swift 6.2+
Add to your Package.swift:
dependencies: [
.package(url: "https://github.com/timothy/QLearnerKit", from: "1.0.0-alpha.1")
]Or add via Xcode: File → Add Package Dependencies
import QLearnerKit
// Create learner for grid world (100 states, 4 actions)
let learner = QLearner(
numStates: 100,
numActions: 4,
alpha: 0.2,
gamma: 0.9,
dyna: 200
)
// Initialize with starting state
var action = learner.querysetstate(0)
// Learning loop
while !done {
let nextState = environment.step(action)
let reward = environment.getReward(nextState)
action = learner.query(nextState, reward: reward)
}
// Get learned policy
let policy = learner.getPolicy()Here's a complete example of using QLearnerKit to learn user content preferences in a news app:
import Foundation
import QLearnerKit
class NewsRecommender {
private let learner: QLearner
private var currentState: Int = 0
enum ContentCategory: Int, CaseIterable {
case sports = 0
case technology = 1
case business = 2
case entertainment = 3
var displayName: String {
switch self {
case .sports: return "Sports"
case .technology: return "Technology"
case .business: return "Business"
case .entertainment: return "Entertainment"
}
}
}
init() {
// 8 states: 4 time periods × 2 day types (weekday/weekend)
// 4 actions: content categories
learner = QLearner(
numStates: 8,
numActions: ContentCategory.allCases.count,
alpha: 0.2, // Learning rate
gamma: 0.9, // Future reward discount
rar: 0.3, // 30% exploration (lower for production)
radr: 0.995, // Slow decay
dyna: 50 // Model-based planning steps
)
}
/// Determine current user context state
private func getCurrentState() -> Int {
let calendar = Calendar.current
let now = Date()
let hour = calendar.component(.hour, from: now)
let isWeekend = calendar.isDateInWeekend(now)
// Time periods: 0=Morning(6-12), 1=Afternoon(12-18), 2=Evening(18-24), 3=Night(0-6)
let timeSlot: Int
switch hour {
case 6..<12: timeSlot = 0
case 12..<18: timeSlot = 1
case 18..<24: timeSlot = 2
default: timeSlot = 3
}
// Combine time and day type: timeSlot * 2 + (weekend ? 1 : 0)
return timeSlot * 2 + (isWeekend ? 1 : 0)
}
/// Get recommended content category for current context
func recommendContent() -> ContentCategory {
currentState = getCurrentState()
let action = learner.querysetstate(currentState)
return ContentCategory(rawValue: action) ?? .technology
}
/// Record user engagement with content
/// - Parameters:
/// - category: The content category shown
/// - didRead: Whether user read the article (>30 seconds)
/// - readDuration: How long user engaged (seconds)
func recordEngagement(category: ContentCategory, didRead: Bool, readDuration: TimeInterval) {
let state = getCurrentState()
// Reward based on engagement level
let reward: Float
if didRead && readDuration > 60 {
reward = 1.0 // Strong positive - user engaged deeply
} else if didRead {
reward = 0.5 // Moderate positive - user read article
} else if readDuration > 5 {
reward = 0.1 // Weak positive - user glanced
} else {
reward = -0.2 // Negative - user ignored
}
_ = learner.query(state, reward: reward)
}
/// Get best category for current context (no exploration)
func getBestCategory() -> ContentCategory {
let state = getCurrentState()
let policy = learner.getPolicy()
return ContentCategory(rawValue: policy[state]) ?? .technology
}
/// Export learned preferences for persistence
func exportPreferences() -> [Float] {
return learner.getQTable().asArray(Float.self)
}
}import SwiftUI
struct ContentView: View {
@StateObject private var recommender = NewsRecommender()
@State private var recommendedCategory: NewsRecommender.ContentCategory = .technology
@State private var articleStartTime: Date?
var body: some View {
VStack(spacing: 20) {
Text("Recommended for You")
.font(.title)
Text(recommendedCategory.displayName)
.font(.headline)
.padding()
.background(Color.blue.opacity(0.2))
.cornerRadius(10)
Button("Show Article") {
showArticle()
}
.buttonStyle(.borderedProminent)
Button("Get New Recommendation") {
recommendedCategory = recommender.recommendContent()
}
}
.padding()
.onAppear {
recommendedCategory = recommender.recommendContent()
}
}
private func showArticle() {
articleStartTime = Date()
// Simulate user reading article
DispatchQueue.main.asyncAfter(deadline: .now() + 2.0) {
recordArticleEngagement(didRead: true)
}
}
private func recordArticleEngagement(didRead: Bool) {
guard let startTime = articleStartTime else { return }
let duration = Date().timeIntervalSince(startTime)
recommender.recordEngagement(
category: recommendedCategory,
didRead: didRead,
readDuration: duration
)
// Get next recommendation
recommendedCategory = recommender.recommendContent()
articleStartTime = nil
}
}extension NewsRecommender {
private var preferencesKey: String { "qlearner_preferences" }
func save() {
let preferences = exportPreferences()
UserDefaults.standard.set(preferences, forKey: preferencesKey)
}
func load() {
// Note: You'd need to implement a method to load Q-table back
// This requires adding a setter or initializer with Q-table data
if let saved = UserDefaults.standard.array(forKey: preferencesKey) as? [Float] {
// Restore Q-table (implementation depends on your needs)
print("Loaded \(saved.count) preference values")
}
}
}QLearner(
numStates: Int = 100, // Number of discrete states
numActions: Int = 4, // Number of available actions
alpha: Float = 0.2, // Learning rate [0.0-1.0]
gamma: Float = 0.9, // Discount factor [0.0-1.0]
rar: Float = 0.5, // Random action rate (epsilon)
radr: Float = 0.99, // Exploration decay rate
dyna: Int = 0, // Planning steps (0 = disabled)
verbose: Bool = false // Debug output
)Initialize state without learning. Use at episode start.
Parameters:
s: Initial state
Returns: Selected action
Main learning step. Updates Q-table and selects next action.
Parameters:
sPrime: New state after previous actionreward: Reward received
Returns: Next action to take
Extract greedy policy (best action per state).
Returns: Array of actions indexed by state
Get Q-table for inspection or persistence.
Returns: Q-table as MLXArray
- Content Recommendation - Learn user preferences for news, videos, products
- UI Personalization - Adapt interface layouts based on user behavior
- Feature Discovery - Guide users to relevant app features
- Notification Timing - Learn optimal times to send notifications
- A/B Testing - Multi-armed bandit for dynamic feature testing
- Keep state space small (< 1000 states for fast learning)
- Use meaningful features (time, context, user segments)
- Combine continuous features into discrete bins
- Use clear positive/negative signals
- Scale rewards consistently (-1 to +1)
- Reward desired behaviors immediately
- Production:
rar: 0.1-0.3(low exploration) - Learning:
rar: 0.5-0.7(high exploration) - Alpha:
0.1-0.3for stable environments - Dyna:
50-200for sample efficiency
- All learning happens on-device
- No user data sent to servers
- Q-table stored locally
- Lightweight: ~100KB memory for 100 states × 10 actions
- Fast updates: < 1ms per query on Apple Silicon
- Efficient planning: Dyna-Q provides 5-10× sample efficiency
Apache-2.0
Timothy Bradford
Contributions welcome! Please open an issue or PR.