Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,12 @@ flycheck0/
# Cache directories
.cache/
node_modules/

# dont push large files to git
src/Backend/test_data/parquet
src/Backend/test_data/csv
src/Backend/test_data/json

# Allow s3_source directory
!src/Backend/test_data/s3_source/
!src/Backend/test_data/s3_source/**
7 changes: 2 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ OptiSQL is a custom in-memory query execution engine. The backend (physical exec
### Prerequisites
- Go 1.24+
- Rust 1.70+
- C++ (marco update this)
- C++23
- Make
- git

Expand Down Expand Up @@ -72,7 +72,6 @@ Initial development is done in **Go** (`opti-sql-go`), which serves as the prima
- `/operators` - SQL operator implementations (filter, join, aggregation, project)
- `/physical-optimizer` - Query plan parsing and optimization
- `/substrait` - Substrait plan integration
- `/project` - [Add description]

## Branching Model

Expand Down Expand Up @@ -130,6 +129,4 @@ Want to contribute? Check out [CONTRIBUTING.md](CONTRIBUTING.md) for detailed gu
- Build and run instructions

## License

This project is licensed under the terms specified in [LICENSE.txt](LICENSE.txt).

This project is licensed under the terms specified in [LICENSE.txt](LICENSE.txt).
2 changes: 0 additions & 2 deletions src/Backend/opti-sql-go/coverage.out

This file was deleted.

15 changes: 15 additions & 0 deletions src/Backend/opti-sql-go/go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
module opti-sql-go

go 1.24.0

require github.com/apache/arrow/go/v17 v17.0.0

require (
github.com/goccy/go-json v0.10.3 // indirect
github.com/google/flatbuffers v24.3.25+incompatible // indirect
github.com/klauspost/cpuid/v2 v2.2.8 // indirect
github.com/zeebo/xxh3 v1.0.2 // indirect
golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 // indirect
golang.org/x/mod v0.18.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/tools v0.22.0 // indirect
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
)
41 changes: 41 additions & 0 deletions src/Backend/opti-sql-go/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
github.com/apache/arrow/go/v17 v17.0.0 h1:RRR2bdqKcdbss9Gxy2NS/hK8i4LDMh23L6BbkN5+F54=
github.com/apache/arrow/go/v17 v17.0.0/go.mod h1:jR7QHkODl15PfYyjM2nU+yTLScZ/qfj7OSUZmJ8putc=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI=
github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM=
github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ=
golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc=
golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0=
golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA=
golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c=
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ=
gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
3 changes: 0 additions & 3 deletions src/Backend/opti-sql-go/main.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
package main

import "fmt"

func main() {
fmt.Println("Hello World")
}
276 changes: 275 additions & 1 deletion src/Backend/opti-sql-go/operators/record.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,277 @@
package operators

// This is what everything is going to be working off of
import (
"fmt"
"strings"

"github.com/apache/arrow/go/v17/arrow"
"github.com/apache/arrow/go/v17/arrow/array"
"github.com/apache/arrow/go/v17/arrow/memory"
)

var (
ErrInvalidSchema = func(info string) error {
return fmt.Errorf("invalid schema was provided. context: %s", info)
}
)

type RecordBatch struct {
Schema *arrow.Schema
Columns []arrow.Array
}

type SchemaBuilder struct {
fields []arrow.Field
}

type RecordBatchBuilder struct {
SchemaBuilder *SchemaBuilder
}

func NewRecordBatchBuilder() *RecordBatchBuilder {
return &RecordBatchBuilder{
SchemaBuilder: &SchemaBuilder{
fields: make([]arrow.Field, 0, 10),
},
}
}

func (sb *SchemaBuilder) WithField(name string, dtype arrow.DataType, nullable bool) *SchemaBuilder {
sb.fields = append(sb.fields, arrow.Field{
Name: name,
Type: dtype,
Nullable: nullable,
})
return sb
}
func (sb *SchemaBuilder) WithoutField(names ...string) *SchemaBuilder {
nameSet := make(map[string]struct{}, len(names))
for _, n := range names {
nameSet[n] = struct{}{}
}

newFields := make([]arrow.Field, 0, len(sb.fields))
for _, field := range sb.fields {
_, found := nameSet[field.Name]
if !found {
newFields = append(newFields, field)
}
}
sb.fields = newFields
return sb

}

func (sb *SchemaBuilder) Build() *arrow.Schema {
return arrow.NewSchema(sb.fields, nil)
}
func (rbb *RecordBatchBuilder) Schema() *arrow.Schema {
return arrow.NewSchema(rbb.SchemaBuilder.fields, nil)
}

// schema is always right in case of type mismatches
func (rbb *RecordBatchBuilder) validate(schema *arrow.Schema, columns []arrow.Array) error {
if len(schema.Fields()) != len(columns) {
return ErrInvalidSchema("schema fields and column count do not match")
}
// make sure that the array data types line up with whats expected of the schema
// Ensure array data types align with schema expectations.
var errors []string
for i := 0; i < len(columns); i++ {
field := schema.Field(i)
colType := columns[i].DataType()

if !arrow.TypeEqual(colType, field.Type) {
errors = append(errors,
fmt.Sprintf("Type mismatch at position %d: column '%s' has type '%s', but schema expects '%s'.",
i, field.Name, colType, field.Type))
}
}
if len(errors) > 0 {
return ErrInvalidSchema(strings.Join(errors, " "))
}
return nil
}
func (rbb *RecordBatchBuilder) NewRecordBatch(schema *arrow.Schema, columns []arrow.Array) (*RecordBatch, error) {
if err := rbb.validate(schema, columns); err != nil {
return nil, err
}
return &RecordBatch{
Schema: schema,
Columns: columns,
}, nil
}
func (rb *RecordBatch) DeepEqual(other *RecordBatch) bool {
if !rb.Schema.Equal(other.Schema) {
return false
}
if len(rb.Columns) != len(other.Columns) {
return false
}
for i := 0; i < len(rb.Columns); i++ {
if !array.Equal(rb.Columns[i], other.Columns[i]) {
return false
}
}
return true
}
func (rbb *RecordBatchBuilder) GenIntArray(values ...int) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewInt32Builder(mem)
defer builder.Release()
for _, v := range values {
builder.Append(int32(v))
}
return builder.NewArray()
}

func (rbb *RecordBatchBuilder) GenFloatArray(values ...float64) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewFloat64Builder(mem)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}

func (rbb *RecordBatchBuilder) GenStringArray(values ...string) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewStringBuilder(mem)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}

func (rbb *RecordBatchBuilder) GenBoolArray(values ...bool) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewBooleanBuilder(mem)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}

// GenInt8Array generates an Int8 array
func (rbb *RecordBatchBuilder) GenInt8Array(values ...int8) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewInt8Builder(mem)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}

// GenInt16Array generates an Int16 array
func (rbb *RecordBatchBuilder) GenInt16Array(values ...int16) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewInt16Builder(mem)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}

// GenInt64Array generates an Int64 array
func (rbb *RecordBatchBuilder) GenInt64Array(values ...int64) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewInt64Builder(mem)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}

// GenUint8Array generates a Uint8 array
func (rbb *RecordBatchBuilder) GenUint8Array(values ...uint8) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewUint8Builder(mem)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}

// GenUint16Array generates a Uint16 array
func (rbb *RecordBatchBuilder) GenUint16Array(values ...uint16) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewUint16Builder(mem)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}

// GenUint32Array generates a Uint32 array
func (rbb *RecordBatchBuilder) GenUint32Array(values ...uint32) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewUint32Builder(mem)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}

// GenUint64Array generates a Uint64 array
func (rbb *RecordBatchBuilder) GenUint64Array(values ...uint64) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewUint64Builder(mem)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}

// GenFloat32Array generates a Float32 array
func (rbb *RecordBatchBuilder) GenFloat32Array(values ...float32) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewFloat32Builder(mem)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}

// GenBinaryArray generates a Binary array
func (rbb *RecordBatchBuilder) GenBinaryArray(values ...[]byte) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}

// GenLargeStringArray generates a LargeString array
func (rbb *RecordBatchBuilder) GenLargeStringArray(values ...string) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewLargeStringBuilder(mem)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}

// GenLargeBinaryArray generates a LargeBinary array
func (rbb *RecordBatchBuilder) GenLargeBinaryArray(values ...[]byte) arrow.Array {
mem := memory.NewGoAllocator()
builder := array.NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary)
defer builder.Release()
for _, v := range values {
builder.Append(v)
}
return builder.NewArray()
}
Loading
Loading