Skip to content

Commit bb2e37b

Browse files
authored
Merge pull request #35 from Rich-T-kid/pre-release
Pre release
2 parents a86cc84 + 488bf2b commit bb2e37b

42 files changed

Lines changed: 10401 additions & 168 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ src/Backend/test_data/json
107107
# Allow a specific CSV dataset that we want tracked despite the general csv ignores
108108
!src/Backend/test_data/csv/
109109
!src/Backend/test_data/csv/Mental_Health_and_Social_Media_Balance_Dataset.csv
110+
!src/Backend/test_data/csv/intergration_test_data_1.csv
111+
!src/Backend/test_data/csv/intergration_test_data_2.csv
110112
# allow parquet file
111113
!src/Backend/test_data/parquet/
112114
!src/Backend/test_data/parquet/capitals_clean.parquet

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ Initial development is done in **Go** (`opti-sql-go`), which serves as the prima
8686
- `/operators` - SQL operator implementations (filter, join, aggregation, project)
8787
- `/physical-optimizer` - Query plan parsing and optimization
8888
- `/substrait` - Substrait plan integration
89+
- `/operators/OPERATORS.md` - concise reference for operator constructors, behavior and examples
8990

9091
## Branching Model
9192

src/Backend/opti-sql-go/Expr/expr.go

Lines changed: 100 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ func EvalExpression(expr Expression, batch *operators.RecordBatch) (arrow.Array,
105105
return EvalScalarFunction(e, batch)
106106
case *CastExpr:
107107
return EvalCast(e, batch)
108+
case *NullCheckExpr:
109+
return EvalNullCheckMask(e.Expr, batch)
108110
default:
109111
return nil, ErrUnsupportedExpression(expr.String())
110112
}
@@ -146,6 +148,8 @@ func ExprDataType(e Expression, inputSchema *arrow.Schema) (arrow.DataType, erro
146148
return nil, err
147149
}
148150
return inferScalarFunctionType(ex.Function, argType), nil
151+
case *NullCheckExpr:
152+
return arrow.FixedWidthTypes.Boolean, nil
149153

150154
default:
151155
return nil, ErrUnsupportedExpression(ex.String())
@@ -215,7 +219,50 @@ type LiteralResolve struct {
215219
}
216220

217221
func NewLiteralResolve(Type arrow.DataType, Value any) *LiteralResolve {
218-
return &LiteralResolve{Type: Type, Value: Value}
222+
var castVal any
223+
224+
switch v := Value.(type) {
225+
226+
// ------------------------------------------------------
227+
// INT → cast based on Arrow integer type
228+
// ------------------------------------------------------
229+
case int:
230+
switch Type.ID() {
231+
case arrow.INT8:
232+
castVal = int8(v)
233+
case arrow.INT16:
234+
castVal = int16(v)
235+
case arrow.INT32:
236+
castVal = int32(v)
237+
case arrow.INT64:
238+
castVal = int64(v)
239+
case arrow.UINT8:
240+
castVal = uint8(v)
241+
case arrow.UINT16:
242+
castVal = uint16(v)
243+
case arrow.UINT32:
244+
castVal = uint32(v)
245+
case arrow.UINT64:
246+
castVal = uint64(v)
247+
default:
248+
// not an integer Arrow type → store original
249+
castVal = v
250+
}
251+
case string:
252+
castVal = string(v)
253+
case bool:
254+
castVal = bool(v)
255+
case float64:
256+
switch Type.ID() {
257+
case arrow.FLOAT32:
258+
castVal = float32(v)
259+
case arrow.FLOAT64:
260+
castVal = float64(v)
261+
}
262+
default:
263+
castVal = Value
264+
}
265+
return &LiteralResolve{Type: Type, Value: castVal}
219266
}
220267
func EvalLiteral(l *LiteralResolve, batch *operators.RecordBatch) (arrow.Array, error) {
221268
n := int(batch.RowCount)
@@ -355,6 +402,16 @@ func EvalLiteral(l *LiteralResolve, batch *operators.RecordBatch) (arrow.Array,
355402
b.Append(v)
356403
}
357404
return b.NewArray(), nil
405+
// ------------------------------
406+
// Nulls
407+
// ------------------------------
408+
case arrow.NULL:
409+
b := array.NewNullBuilder(memory.DefaultAllocator)
410+
defer b.Release()
411+
for i := 0; i < n; i++ {
412+
b.AppendNull()
413+
}
414+
return b.NewArray(), nil
358415

359416
default:
360417
return nil, fmt.Errorf("literal type %s not supported", l.Type)
@@ -389,37 +446,36 @@ func EvalBinary(b *BinaryExpr, batch *operators.RecordBatch) (arrow.Array, error
389446
if err != nil {
390447
return nil, err
391448
}
449+
ctx := context.Background()
392450
opt := compute.ArithmeticOptions{}
393451
switch b.Op {
394452
// arithmetic
395453
case Addition:
396-
datum, err := compute.Add(context.TODO(), opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr))
454+
datum, err := compute.Add(ctx, opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr))
397455
if err != nil {
398456
return nil, err
399457
}
400458
return unpackDatum(datum)
401459
case Subtraction:
402-
datum, err := compute.Subtract(context.TODO(), opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr))
460+
datum, err := compute.Subtract(ctx, opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr))
403461
if err != nil {
404462
return nil, err
405463
}
406464
return unpackDatum(datum)
407465

408466
case Multiplication:
409-
datum, err := compute.Multiply(context.TODO(), opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr))
467+
datum, err := compute.Multiply(ctx, opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr))
410468
if err != nil {
411469
return nil, err
412470
}
413471
return unpackDatum(datum)
414472
case Division:
415-
datum, err := compute.Divide(context.TODO(), opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr))
473+
datum, err := compute.Divide(ctx, opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr))
416474
if err != nil {
417475
return nil, err
418476
}
419477
return unpackDatum(datum)
420478

421-
// comparisions TODO:
422-
// These return a boolean array
423479
case Equal:
424480
if leftArr.DataType() != rightArr.DataType() {
425481
return nil, ErrCantCompareDifferentTypes(leftArr.DataType(), rightArr.DataType())
@@ -495,15 +551,13 @@ func EvalBinary(b *BinaryExpr, batch *operators.RecordBatch) (arrow.Array, error
495551
return unpackDatum(datum)
496552
case Like:
497553
if leftArr.DataType() != arrow.BinaryTypes.String || rightArr.DataType() != arrow.BinaryTypes.String {
498-
// regEx runs only on strings
499554
return nil, errors.New("binary operator Like only works on arrays of strings")
500555
}
501556
var compiledRegEx = compileSqlRegEx(rightArr.ValueStr(0))
502557
filterBuilder := array.NewBooleanBuilder(memory.NewGoAllocator())
503558
leftStrArray := leftArr.(*array.String)
504559
for i := 0; i < leftStrArray.Len(); i++ {
505560
valid := validRegEx(leftStrArray.Value(i), compiledRegEx)
506-
fmt.Printf("does %s match %s: %v\n", leftStrArray.Value(i), compiledRegEx, valid)
507561
filterBuilder.Append(valid)
508562
}
509563
return filterBuilder.NewArray(), nil
@@ -536,6 +590,7 @@ func NewScalarFunction(function supportedFunctions, Argument Expression) *Scalar
536590
}
537591

538592
func EvalScalarFunction(s *ScalarFunction, batch *operators.RecordBatch) (arrow.Array, error) {
593+
ctx := context.Background()
539594
switch s.Function {
540595
case Upper:
541596
arr, err := EvalExpression(s.Arguments, batch)
@@ -555,7 +610,7 @@ func EvalScalarFunction(s *ScalarFunction, batch *operators.RecordBatch) (arrow.
555610
if err != nil {
556611
return nil, err
557612
}
558-
datum, err := compute.AbsoluteValue(context.TODO(), compute.ArithmeticOptions{}, compute.NewDatum(arr))
613+
datum, err := compute.AbsoluteValue(ctx, compute.ArithmeticOptions{}, compute.NewDatum(arr))
559614
if err != nil {
560615
return nil, err
561616
}
@@ -565,7 +620,7 @@ func EvalScalarFunction(s *ScalarFunction, batch *operators.RecordBatch) (arrow.
565620
if err != nil {
566621
return nil, err
567622
}
568-
datum, err := compute.Round(context.TODO(), compute.DefaultRoundOptions, compute.NewDatum(arr))
623+
datum, err := compute.Round(ctx, compute.DefaultRoundOptions, compute.NewDatum(arr))
569624
if err != nil {
570625
return nil, err
571626
}
@@ -600,9 +655,8 @@ func EvalCast(c *CastExpr, batch *operators.RecordBatch) (arrow.Array, error) {
600655

601656
// Use Arrow compute kernel to cast
602657
castOpts := compute.SafeCastOptions(c.TargetType)
603-
out, err := compute.CastArray(context.TODO(), arr, castOpts)
658+
out, err := compute.CastArray(context.Background(), arr, castOpts)
604659
if err != nil {
605-
// This is a runtime cast error
606660
return nil, fmt.Errorf("cast error: cannot cast %s to %s: %w",
607661
arr.DataType(), c.TargetType, err)
608662
}
@@ -615,6 +669,39 @@ func (c *CastExpr) String() string {
615669
return fmt.Sprintf("Cast(%s AS %s)", c.Expr, c.TargetType)
616670
}
617671

672+
type NullCheckExpr struct {
673+
Expr Expression
674+
}
675+
676+
func NewNullCheckExpr(expr Expression) *NullCheckExpr {
677+
return &NullCheckExpr{Expr: expr}
678+
}
679+
func (n *NullCheckExpr) ExprNode() {}
680+
func (n *NullCheckExpr) String() string {
681+
return fmt.Sprintf("NullCheck(%s)", n.Expr.String())
682+
}
683+
func EvalNullCheckMask(expr Expression, batch *operators.RecordBatch) (arrow.Array, error) {
684+
// Step 1: Evaluate underlying expression
685+
arr, err := EvalExpression(expr, batch)
686+
if err != nil {
687+
return nil, err
688+
}
689+
690+
length := arr.Len()
691+
692+
// Step 2: Build boolean mask
693+
builder := array.NewBooleanBuilder(memory.DefaultAllocator)
694+
builder.Resize(length)
695+
696+
for i := 0; i < length; i++ {
697+
builder.Append(!arr.IsNull(i)) // true = not null
698+
}
699+
// Step 3: produce final Boolean array
700+
mask := builder.NewArray()
701+
builder.Release()
702+
return mask, nil
703+
}
704+
618705
func upperImpl(arr arrow.Array) (arrow.Array, error) {
619706
strArr, ok := arr.(*array.String)
620707
if !ok {

0 commit comments

Comments
 (0)