Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions FuzzTesting/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.build/
Package.resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type":"transcript.text.delta","delta":"Hello"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type":"transcript.text.done","text":"Hello world","logprobs":[{"token":"Hello","bytes":[72,101,108,108,111],"logprob":-0.5},{"token":" world","bytes":[32,119,111,114,108,100],"logprob":-0.7}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":"chatcmpl-123","object":"chat.completion","created":1677652288,"model":"gpt-4","choices":[{"index":0,"message":{"role":"assistant","content":"Hello, world!","annotations":[],"tool_calls":[]},"finish_reason":"stop"}],"usage":{"prompt_tokens":9,"completion_tokens":12,"total_tokens":21},"system_fingerprint":"fp_fc9f1d7035"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":"chatcmpl-xyz","object":"chat.completion","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"message":{"role":"assistant","content":null,"refusal":null,"tool_calls":[{"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Kyiv\"}"}}]},"finish_reason":"tool_calls"}],"usage":{"prompt_tokens":80,"completion_tokens":24,"total_tokens":104}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":"chatcmpl-stream-1","object":"chat.completion.chunk","created":1738577084,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant","content":"Hi"},"logprobs":null,"finish_reason":null}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":"chatcmpl-stream-3","object":"chat.completion.chunk","created":1738577084,"model":"gpt-4o","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":"chatcmpl-stream-2","object":"chat.completion.chunk","created":1738577084,"model":"gpt-4o","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"id":"call_1","type":"function","function":{"name":"get_weather","arguments":"{\""}}]},"finish_reason":null}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":"resp_abc","object":"response","created_at":1700000000,"status":"completed","model":"gpt-4o","output":[{"id":"msg_1","type":"message","role":"assistant","status":"completed","content":[{"type":"output_text","text":"Hello","annotations":[]}]}],"parallel_tool_calls":true,"tool_choice":"auto","tools":[],"usage":{"input_tokens":10,"output_tokens":5,"total_tokens":15}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type":"response.function_call_arguments.done","item_id":"item-abc","name":"get_weather","output_index":1,"arguments":"{ \"arg\": 123 }","sequence_number":1}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type":"response.mcp_call_arguments.done","item_id":"mcp_1","output_index":0,"arguments":"{}","sequence_number":3}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type":"response.output_text.delta","item_id":"msg_1","output_index":0,"content_index":0,"delta":"Hi","sequence_number":1}
47 changes: 47 additions & 0 deletions FuzzTesting/Package.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// swift-tools-version: 5.10
//
// FuzzTesting/Package.swift
//
// A separate SwiftPM package for libFuzzer harnesses. Lives under
// `FuzzTesting/` so that the main `OpenAI` package manifest stays
// unchanged for library consumers.
//
// Pattern adapted from grpc-swift's FuzzTesting setup. See
// `FuzzTesting/README.md` for how to build and run.

import PackageDescription

let package = Package(
name: "FuzzTesting",
platforms: [.macOS(.v10_15)],
dependencies: [
.package(name: "OpenAI", path: ".."),
],
targets: [
.executableTarget(
name: "FuzzResponseStreamEventDecoder",
dependencies: [.product(name: "OpenAI", package: "OpenAI")],
path: "Sources/FuzzResponseStreamEventDecoder"
),
.executableTarget(
name: "FuzzChatResultDecoder",
dependencies: [.product(name: "OpenAI", package: "OpenAI")],
path: "Sources/FuzzChatResultDecoder"
),
.executableTarget(
name: "FuzzChatStreamResultDecoder",
dependencies: [.product(name: "OpenAI", package: "OpenAI")],
path: "Sources/FuzzChatStreamResultDecoder"
),
.executableTarget(
name: "FuzzResponseObjectDecoder",
dependencies: [.product(name: "OpenAI", package: "OpenAI")],
path: "Sources/FuzzResponseObjectDecoder"
),
.executableTarget(
name: "FuzzAudioTranscriptionStreamResultDecoder",
dependencies: [.product(name: "OpenAI", package: "OpenAI")],
path: "Sources/FuzzAudioTranscriptionStreamResultDecoder"
),
]
)
84 changes: 84 additions & 0 deletions FuzzTesting/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# FuzzTesting

libFuzzer harnesses for the `OpenAI` Swift package. Lives in its own
SwiftPM package so the main library's `Package.swift` stays clean for
library consumers.

This is the in-repo half of the work tracked in [#241 — Integrate
OSS-Fuzz](https://github.com/MacPaw/OpenAI/issues/241). The upstream
`google/oss-fuzz` project submission is a separate piece.

## Targets

| Target | What it fuzzes |
| ----------------------------------------------- | --------------------------------------------------------------- |
| `FuzzResponseStreamEventDecoder` | `JSONDecoder().decode(ResponseStreamEvent.self, …)` |
| `FuzzChatResultDecoder` | `JSONDecoder().decode(ChatResult.self, …)` |
| `FuzzChatStreamResultDecoder` | `JSONDecoder().decode(ChatStreamResult.self, …)` |
| `FuzzResponseObjectDecoder` | `JSONDecoder().decode(ResponseObject.self, …)` |
| `FuzzAudioTranscriptionStreamResultDecoder` | `JSONDecoder().decode(AudioTranscriptionStreamResult.self, …)` |

All five targets fuzz Codable decoders that consume bytes off the wire:

- `ResponseStreamEvent.init(from:)` runs several fallible decode passes
before falling back to a generated raw event with ~50 oneOf cases.
- `ChatResult` / `ChatStreamResult` cover the non-streaming and
streaming `/v1/chat/completions` response shapes, including tool
calls, annotations, refusals, and usage breakdowns.
- `ResponseObject` covers the non-streaming `/v1/responses` envelope
with reasoning, tool outputs, and rich output items.
- `AudioTranscriptionStreamResult` has a hand-written `init(from:)`
that branches on `type` and optionally reads `logprobs`.

## Replay mode (no libFuzzer toolchain required)

```sh
cd FuzzTesting
swift build
swift run FuzzResponseStreamEventDecoder FuzzCorpus/FuzzResponseStreamEventDecoder/seed-function-call-args-done.json
```

Replay mode runs the harness against a single input file and exits. Use
it to reproduce libFuzzer crash artifacts locally, or to smoke-test the
harness against seed inputs.

## Fuzzing mode (libFuzzer)

Requires a Swift toolchain built with libFuzzer support
([reference](https://github.com/apple/swift/blob/main/docs/libFuzzerIntegration.md)).
On Linux, this typically means a Swift toolchain installed with the
fuzzer runtime; on macOS, support is more limited.

```sh
cd FuzzTesting
swift build -c debug \
-Xswiftc -sanitize=fuzzer,address \
-Xswiftc -parse-as-library \
-Xswiftc -DFUZZING_ENABLED
./.build/debug/FuzzResponseStreamEventDecoder \
FuzzCorpus/FuzzResponseStreamEventDecoder \
-max_total_time=60
```

`-DFUZZING_ENABLED` removes each harness's `@main` replay entry point
so libFuzzer's own `main` (provided by `-sanitize=fuzzer`) drives
`LLVMFuzzerTestOneInput`.

## Adding a corpus input

Drop any byte sequence into `FuzzCorpus/<TargetName>/`. Real-world
response chunks captured during integration testing make the best seeds.
The fuzzer will mutate from these.

## Adding a new harness

1. Add an `.executableTarget` to `Package.swift`.
2. Create `Sources/<TargetName>/<TargetName>.swift` with the same shape
as the existing harnesses:
- One `@_cdecl("LLVMFuzzerTestOneInput")` function with the body
under test.
- A `#if !FUZZING_ENABLED` replay `@main` for local use.
3. Add a `FuzzCorpus/<TargetName>/` directory with at least one seed.

Pick targets that consume untrusted bytes: parsers, decoders, anything
on the network-input path.
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//
// FuzzAudioTranscriptionStreamResultDecoder.swift
// OpenAI / FuzzTesting
//
// libFuzzer harness for `AudioTranscriptionStreamResult` JSON decoding.
// The streamed audio transcription event has a hand-written
// `init(from:)` (see Public/Models/AudioTranscriptionStreamResult.swift)
// that branches on `type` and optionally reads `logprobs`, so it has a
// decent surface for malformed inputs.
//
// See FuzzTesting/README.md.

import Foundation
import OpenAI

private let decoder = JSONDecoder()

@inline(__always)
private func decodeOnce(_ data: Data) {
_ = try? decoder.decode(AudioTranscriptionStreamResult.self, from: data)
}

@_cdecl("LLVMFuzzerTestOneInput")
public func LLVMFuzzerTestOneInput(_ start: UnsafePointer<UInt8>, _ count: Int) -> Int32 {
decodeOnce(Data(bytes: start, count: count))
return 0
}

#if !FUZZING_ENABLED
@main
enum Replay {
static func main() throws {
let args = CommandLine.arguments
guard args.count == 2 else {
FileHandle.standardError.write(Data("usage: \(args.first ?? "FuzzAudioTranscriptionStreamResultDecoder") <input-file>\n".utf8))
exit(2)
}
let data = try Data(contentsOf: URL(fileURLWithPath: args[1]))
decodeOnce(data)
print("decoded \(data.count) bytes without crashing")
}
}
#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//
// FuzzChatResultDecoder.swift
// OpenAI / FuzzTesting
//
// libFuzzer harness for `ChatResult` JSON decoding. `ChatResult` is the
// non-streaming response body for `/v1/chat/completions` — bytes come
// straight off the wire and pass through a Codable initializer that
// recursively decodes message content, tool calls, annotations, and
// usage details, any of which can have unexpected shapes.
//
// See FuzzTesting/README.md.

import Foundation
import OpenAI

private let decoder = JSONDecoder()

@inline(__always)
private func decodeOnce(_ data: Data) {
_ = try? decoder.decode(ChatResult.self, from: data)
}

@_cdecl("LLVMFuzzerTestOneInput")
public func LLVMFuzzerTestOneInput(_ start: UnsafePointer<UInt8>, _ count: Int) -> Int32 {
decodeOnce(Data(bytes: start, count: count))
return 0
}

#if !FUZZING_ENABLED
@main
enum Replay {
static func main() throws {
let args = CommandLine.arguments
guard args.count == 2 else {
FileHandle.standardError.write(Data("usage: \(args.first ?? "FuzzChatResultDecoder") <input-file>\n".utf8))
exit(2)
}
let data = try Data(contentsOf: URL(fileURLWithPath: args[1]))
decodeOnce(data)
print("decoded \(data.count) bytes without crashing")
}
}
#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//
// FuzzChatStreamResultDecoder.swift
// OpenAI / FuzzTesting
//
// libFuzzer harness for `ChatStreamResult` JSON decoding — the
// per-chunk type emitted by `/v1/chat/completions` when streaming.
// Each SSE `data:` line is decoded into this type, so any malformed
// byte sequence the parser tolerates ends up here.
//
// See FuzzTesting/README.md.

import Foundation
import OpenAI

private let decoder = JSONDecoder()

@inline(__always)
private func decodeOnce(_ data: Data) {
_ = try? decoder.decode(ChatStreamResult.self, from: data)
}

@_cdecl("LLVMFuzzerTestOneInput")
public func LLVMFuzzerTestOneInput(_ start: UnsafePointer<UInt8>, _ count: Int) -> Int32 {
decodeOnce(Data(bytes: start, count: count))
return 0
}

#if !FUZZING_ENABLED
@main
enum Replay {
static func main() throws {
let args = CommandLine.arguments
guard args.count == 2 else {
FileHandle.standardError.write(Data("usage: \(args.first ?? "FuzzChatStreamResultDecoder") <input-file>\n".utf8))
exit(2)
}
let data = try Data(contentsOf: URL(fileURLWithPath: args[1]))
decodeOnce(data)
print("decoded \(data.count) bytes without crashing")
}
}
#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//
// FuzzResponseObjectDecoder.swift
// OpenAI / FuzzTesting
//
// libFuzzer harness for `ResponseObject` JSON decoding — the
// non-streaming response body for `/v1/responses`. Has a richer
// envelope than `ChatResult`, with reasoning blocks, tool outputs,
// output items of multiple kinds, and prompt references.
//
// See FuzzTesting/README.md.

import Foundation
import OpenAI

private let decoder = JSONDecoder()

@inline(__always)
private func decodeOnce(_ data: Data) {
_ = try? decoder.decode(ResponseObject.self, from: data)
}

@_cdecl("LLVMFuzzerTestOneInput")
public func LLVMFuzzerTestOneInput(_ start: UnsafePointer<UInt8>, _ count: Int) -> Int32 {
decodeOnce(Data(bytes: start, count: count))
return 0
}

#if !FUZZING_ENABLED
@main
enum Replay {
static func main() throws {
let args = CommandLine.arguments
guard args.count == 2 else {
FileHandle.standardError.write(Data("usage: \(args.first ?? "FuzzResponseObjectDecoder") <input-file>\n".utf8))
exit(2)
}
let data = try Data(contentsOf: URL(fileURLWithPath: args[1]))
decodeOnce(data)
print("decoded \(data.count) bytes without crashing")
}
}
#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
//
// FuzzResponseStreamEventDecoder.swift
// OpenAI / FuzzTesting
//
// libFuzzer harness for `ResponseStreamEvent` JSON decoding.
//
// `ResponseStreamEvent.init(from:)` has a non-trivial decoding pipeline
// with multiple `try?`/fallback paths (see Public/Schemas/Facade/
// ResponseStreamEvent.swift). It consumes untrusted bytes off the wire,
// which makes it a high-value target for fuzz testing.
//
// See FuzzTesting/README.md for how to build & run.

import Foundation
import OpenAI

private let decoder = JSONDecoder()

/// The body of one fuzzer iteration. Catching all errors — we are looking
/// for crashes, hangs, and sanitizer findings, not decode failures.
@inline(__always)
private func decodeOnce(_ data: Data) {
_ = try? decoder.decode(ResponseStreamEvent.self, from: data)
}

@_cdecl("LLVMFuzzerTestOneInput")
public func LLVMFuzzerTestOneInput(_ start: UnsafePointer<UInt8>, _ count: Int) -> Int32 {
decodeOnce(Data(bytes: start, count: count))
return 0
}

#if !FUZZING_ENABLED
// Replay mode for local use without libFuzzer: takes a path to a single
// input file (e.g. a libFuzzer crash artifact) and runs one decode pass.
// libFuzzer's own driver is linked in when built with `-sanitize=fuzzer`,
// in which case this `@main` entry is excluded via the `FUZZING_ENABLED`
// flag (set in `build-fuzzer.sh`).
@main
enum Replay {
static func main() throws {
let args = CommandLine.arguments
guard args.count == 2 else {
FileHandle.standardError.write(Data("usage: \(args.first ?? "FuzzResponseStreamEventDecoder") <input-file>\n".utf8))
exit(2)
}
let data = try Data(contentsOf: URL(fileURLWithPath: args[1]))
decodeOnce(data)
print("decoded \(data.count) bytes without crashing")
}
}
#endif
Loading