|
| 1 | +# QLearnerKit |
| 2 | + |
| 3 | +A Swift implementation of Q-Learning with Dyna-Q planning, optimized for Apple Silicon using MLX. Perfect for building adaptive iOS apps that learn user preferences over time. |
| 4 | + |
| 5 | +## Features |
| 6 | + |
| 7 | +- **Tabular Q-Learning** - Classic reinforcement learning for discrete state/action spaces |
| 8 | +- **Dyna-Q Planning** - Model-based learning for improved sample efficiency |
| 9 | +- **MLX-Powered** - Leverages Apple's MLX framework for efficient computation |
| 10 | +- **iOS Ready** - Designed for real-world iOS app integration |
| 11 | + |
| 12 | +## Requirements |
| 13 | + |
| 14 | +- macOS 14.0+ / iOS 17.0+ |
| 15 | +- Apple Silicon (M1/M2/M3/M4) |
| 16 | +- Swift 6.2+ |
| 17 | + |
| 18 | +## Installation |
| 19 | + |
| 20 | +### Swift Package Manager |
| 21 | + |
| 22 | +Add to your `Package.swift`: |
| 23 | + |
| 24 | +```swift |
| 25 | +dependencies: [ |
| 26 | + .package(url: "https://github.com/timothy/QLearnerKit", from: "1.0.0") |
| 27 | +] |
| 28 | +``` |
| 29 | + |
| 30 | +Or add via Xcode: **File → Add Package Dependencies** |
| 31 | + |
| 32 | +## Quick Start |
| 33 | + |
| 34 | +### Basic Q-Learning |
| 35 | + |
| 36 | +```swift |
| 37 | +import QLearnerKit |
| 38 | + |
| 39 | +// Create learner for grid world (100 states, 4 actions) |
| 40 | +let learner = QLearner( |
| 41 | + numStates: 100, |
| 42 | + numActions: 4, |
| 43 | + alpha: 0.2, |
| 44 | + gamma: 0.9, |
| 45 | + dyna: 200 |
| 46 | +) |
| 47 | + |
| 48 | +// Initialize with starting state |
| 49 | +var action = learner.querysetstate(0) |
| 50 | + |
| 51 | +// Learning loop |
| 52 | +while !done { |
| 53 | + let nextState = environment.step(action) |
| 54 | + let reward = environment.getReward(nextState) |
| 55 | + action = learner.query(nextState, reward: reward) |
| 56 | +} |
| 57 | + |
| 58 | +// Get learned policy |
| 59 | +let policy = learner.getPolicy() |
| 60 | +``` |
| 61 | + |
| 62 | +## iOS User Preference Learning |
| 63 | + |
| 64 | +Here's a complete example of using QLearnerKit to learn user content preferences in a news app: |
| 65 | + |
| 66 | +### Setup |
| 67 | + |
| 68 | +```swift |
| 69 | +import Foundation |
| 70 | +import QLearnerKit |
| 71 | + |
| 72 | +class NewsRecommender { |
| 73 | + private let learner: QLearner |
| 74 | + private var currentState: Int = 0 |
| 75 | + |
| 76 | + enum ContentCategory: Int, CaseIterable { |
| 77 | + case sports = 0 |
| 78 | + case technology = 1 |
| 79 | + case business = 2 |
| 80 | + case entertainment = 3 |
| 81 | + |
| 82 | + var displayName: String { |
| 83 | + switch self { |
| 84 | + case .sports: return "Sports" |
| 85 | + case .technology: return "Technology" |
| 86 | + case .business: return "Business" |
| 87 | + case .entertainment: return "Entertainment" |
| 88 | + } |
| 89 | + } |
| 90 | + } |
| 91 | + |
| 92 | + init() { |
| 93 | + // 8 states: 4 time periods × 2 day types (weekday/weekend) |
| 94 | + // 4 actions: content categories |
| 95 | + learner = QLearner( |
| 96 | + numStates: 8, |
| 97 | + numActions: ContentCategory.allCases.count, |
| 98 | + alpha: 0.2, // Learning rate |
| 99 | + gamma: 0.9, // Future reward discount |
| 100 | + rar: 0.3, // 30% exploration (lower for production) |
| 101 | + radr: 0.995, // Slow decay |
| 102 | + dyna: 50 // Model-based planning steps |
| 103 | + ) |
| 104 | + } |
| 105 | + |
| 106 | + /// Determine current user context state |
| 107 | + private func getCurrentState() -> Int { |
| 108 | + let calendar = Calendar.current |
| 109 | + let now = Date() |
| 110 | + |
| 111 | + let hour = calendar.component(.hour, from: now) |
| 112 | + let isWeekend = calendar.isDateInWeekend(now) |
| 113 | + |
| 114 | + // Time periods: 0=Morning(6-12), 1=Afternoon(12-18), 2=Evening(18-24), 3=Night(0-6) |
| 115 | + let timeSlot: Int |
| 116 | + switch hour { |
| 117 | + case 6..<12: timeSlot = 0 |
| 118 | + case 12..<18: timeSlot = 1 |
| 119 | + case 18..<24: timeSlot = 2 |
| 120 | + default: timeSlot = 3 |
| 121 | + } |
| 122 | + |
| 123 | + // Combine time and day type: timeSlot * 2 + (weekend ? 1 : 0) |
| 124 | + return timeSlot * 2 + (isWeekend ? 1 : 0) |
| 125 | + } |
| 126 | + |
| 127 | + /// Get recommended content category for current context |
| 128 | + func recommendContent() -> ContentCategory { |
| 129 | + currentState = getCurrentState() |
| 130 | + let action = learner.querysetstate(currentState) |
| 131 | + return ContentCategory(rawValue: action) ?? .technology |
| 132 | + } |
| 133 | + |
| 134 | + /// Record user engagement with content |
| 135 | + /// - Parameters: |
| 136 | + /// - category: The content category shown |
| 137 | + /// - didRead: Whether user read the article (>30 seconds) |
| 138 | + /// - readDuration: How long user engaged (seconds) |
| 139 | + func recordEngagement(category: ContentCategory, didRead: Bool, readDuration: TimeInterval) { |
| 140 | + let state = getCurrentState() |
| 141 | + |
| 142 | + // Reward based on engagement level |
| 143 | + let reward: Float |
| 144 | + if didRead && readDuration > 60 { |
| 145 | + reward = 1.0 // Strong positive - user engaged deeply |
| 146 | + } else if didRead { |
| 147 | + reward = 0.5 // Moderate positive - user read article |
| 148 | + } else if readDuration > 5 { |
| 149 | + reward = 0.1 // Weak positive - user glanced |
| 150 | + } else { |
| 151 | + reward = -0.2 // Negative - user ignored |
| 152 | + } |
| 153 | + |
| 154 | + _ = learner.query(state, reward: reward) |
| 155 | + } |
| 156 | + |
| 157 | + /// Get best category for current context (no exploration) |
| 158 | + func getBestCategory() -> ContentCategory { |
| 159 | + let state = getCurrentState() |
| 160 | + let policy = learner.getPolicy() |
| 161 | + return ContentCategory(rawValue: policy[state]) ?? .technology |
| 162 | + } |
| 163 | + |
| 164 | + /// Export learned preferences for persistence |
| 165 | + func exportPreferences() -> [Float] { |
| 166 | + return learner.getQTable().asArray(Float.self) |
| 167 | + } |
| 168 | +} |
| 169 | +``` |
| 170 | + |
| 171 | +### Usage in SwiftUI |
| 172 | + |
| 173 | +```swift |
| 174 | +import SwiftUI |
| 175 | + |
| 176 | +struct ContentView: View { |
| 177 | + @StateObject private var recommender = NewsRecommender() |
| 178 | + @State private var recommendedCategory: NewsRecommender.ContentCategory = .technology |
| 179 | + @State private var articleStartTime: Date? |
| 180 | + |
| 181 | + var body: some View { |
| 182 | + VStack(spacing: 20) { |
| 183 | + Text("Recommended for You") |
| 184 | + .font(.title) |
| 185 | + |
| 186 | + Text(recommendedCategory.displayName) |
| 187 | + .font(.headline) |
| 188 | + .padding() |
| 189 | + .background(Color.blue.opacity(0.2)) |
| 190 | + .cornerRadius(10) |
| 191 | + |
| 192 | + Button("Show Article") { |
| 193 | + showArticle() |
| 194 | + } |
| 195 | + .buttonStyle(.borderedProminent) |
| 196 | + |
| 197 | + Button("Get New Recommendation") { |
| 198 | + recommendedCategory = recommender.recommendContent() |
| 199 | + } |
| 200 | + } |
| 201 | + .padding() |
| 202 | + .onAppear { |
| 203 | + recommendedCategory = recommender.recommendContent() |
| 204 | + } |
| 205 | + } |
| 206 | + |
| 207 | + private func showArticle() { |
| 208 | + articleStartTime = Date() |
| 209 | + |
| 210 | + // Simulate user reading article |
| 211 | + DispatchQueue.main.asyncAfter(deadline: .now() + 2.0) { |
| 212 | + recordArticleEngagement(didRead: true) |
| 213 | + } |
| 214 | + } |
| 215 | + |
| 216 | + private func recordArticleEngagement(didRead: Bool) { |
| 217 | + guard let startTime = articleStartTime else { return } |
| 218 | + let duration = Date().timeIntervalSince(startTime) |
| 219 | + |
| 220 | + recommender.recordEngagement( |
| 221 | + category: recommendedCategory, |
| 222 | + didRead: didRead, |
| 223 | + readDuration: duration |
| 224 | + ) |
| 225 | + |
| 226 | + // Get next recommendation |
| 227 | + recommendedCategory = recommender.recommendContent() |
| 228 | + articleStartTime = nil |
| 229 | + } |
| 230 | +} |
| 231 | +``` |
| 232 | + |
| 233 | +### Persistence Example |
| 234 | + |
| 235 | +```swift |
| 236 | +extension NewsRecommender { |
| 237 | + private var preferencesKey: String { "qlearner_preferences" } |
| 238 | + |
| 239 | + func save() { |
| 240 | + let preferences = exportPreferences() |
| 241 | + UserDefaults.standard.set(preferences, forKey: preferencesKey) |
| 242 | + } |
| 243 | + |
| 244 | + func load() { |
| 245 | + // Note: You'd need to implement a method to load Q-table back |
| 246 | + // This requires adding a setter or initializer with Q-table data |
| 247 | + if let saved = UserDefaults.standard.array(forKey: preferencesKey) as? [Float] { |
| 248 | + // Restore Q-table (implementation depends on your needs) |
| 249 | + print("Loaded \(saved.count) preference values") |
| 250 | + } |
| 251 | + } |
| 252 | +} |
| 253 | +``` |
| 254 | + |
| 255 | +## API Reference |
| 256 | + |
| 257 | +### Initialization |
| 258 | + |
| 259 | +```swift |
| 260 | +QLearner( |
| 261 | + numStates: Int = 100, // Number of discrete states |
| 262 | + numActions: Int = 4, // Number of available actions |
| 263 | + alpha: Float = 0.2, // Learning rate [0.0-1.0] |
| 264 | + gamma: Float = 0.9, // Discount factor [0.0-1.0] |
| 265 | + rar: Float = 0.5, // Random action rate (epsilon) |
| 266 | + radr: Float = 0.99, // Exploration decay rate |
| 267 | + dyna: Int = 0, // Planning steps (0 = disabled) |
| 268 | + verbose: Bool = false // Debug output |
| 269 | +) |
| 270 | +``` |
| 271 | + |
| 272 | +### Core Methods |
| 273 | + |
| 274 | +#### `querysetstate(_ s: Int) -> Int` |
| 275 | +Initialize state without learning. Use at episode start. |
| 276 | + |
| 277 | +**Parameters:** |
| 278 | +- `s`: Initial state |
| 279 | + |
| 280 | +**Returns:** Selected action |
| 281 | + |
| 282 | +#### `query(_ sPrime: Int, reward: Float) -> Int` |
| 283 | +Main learning step. Updates Q-table and selects next action. |
| 284 | + |
| 285 | +**Parameters:** |
| 286 | +- `sPrime`: New state after previous action |
| 287 | +- `reward`: Reward received |
| 288 | + |
| 289 | +**Returns:** Next action to take |
| 290 | + |
| 291 | +#### `getPolicy() -> [Int]` |
| 292 | +Extract greedy policy (best action per state). |
| 293 | + |
| 294 | +**Returns:** Array of actions indexed by state |
| 295 | + |
| 296 | +#### `getQTable() -> MLXArray` |
| 297 | +Get Q-table for inspection or persistence. |
| 298 | + |
| 299 | +**Returns:** Q-table as MLXArray |
| 300 | + |
| 301 | +## Use Cases |
| 302 | + |
| 303 | +- **Content Recommendation** - Learn user preferences for news, videos, products |
| 304 | +- **UI Personalization** - Adapt interface layouts based on user behavior |
| 305 | +- **Feature Discovery** - Guide users to relevant app features |
| 306 | +- **Notification Timing** - Learn optimal times to send notifications |
| 307 | +- **A/B Testing** - Multi-armed bandit for dynamic feature testing |
| 308 | + |
| 309 | +## Best Practices |
| 310 | + |
| 311 | +### State Design |
| 312 | +- Keep state space small (< 1000 states for fast learning) |
| 313 | +- Use meaningful features (time, context, user segments) |
| 314 | +- Combine continuous features into discrete bins |
| 315 | + |
| 316 | +### Reward Shaping |
| 317 | +- Use clear positive/negative signals |
| 318 | +- Scale rewards consistently (-1 to +1) |
| 319 | +- Reward desired behaviors immediately |
| 320 | + |
| 321 | +### Hyperparameters |
| 322 | +- **Production**: `rar: 0.1-0.3` (low exploration) |
| 323 | +- **Learning**: `rar: 0.5-0.7` (high exploration) |
| 324 | +- **Alpha**: `0.1-0.3` for stable environments |
| 325 | +- **Dyna**: `50-200` for sample efficiency |
| 326 | + |
| 327 | +### Privacy |
| 328 | +- All learning happens on-device |
| 329 | +- No user data sent to servers |
| 330 | +- Q-table stored locally |
| 331 | + |
| 332 | +## Performance |
| 333 | + |
| 334 | +- Lightweight: ~100KB memory for 100 states × 10 actions |
| 335 | +- Fast updates: < 1ms per query on Apple Silicon |
| 336 | +- Efficient planning: Dyna-Q provides 5-10× sample efficiency |
| 337 | + |
| 338 | +## License |
| 339 | + |
| 340 | +Apache-2.0 |
| 341 | + |
| 342 | +## Author |
| 343 | + |
| 344 | +Timothy Bradford |
| 345 | + |
| 346 | +## Contributing |
| 347 | + |
| 348 | +Contributions welcome! Please open an issue or PR. |
0 commit comments