@@ -2914,6 +2914,10 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
29142914 // Note: wcharEncoding parameter is reserved for future use
29152915 // Currently WCHAR data always uses UTF-16LE for Windows compatibility
29162916 (void )wcharEncoding; // Suppress unused parameter warning
2917+ #if !defined(__APPLE__) && !defined(__linux__)
2918+ // On Windows, VARCHAR is fetched as SQL_C_WCHAR, so charEncoding is unused.
2919+ (void )charEncoding;
2920+ #endif
29172921
29182922 LOG (" SQLGetData: Getting data from %d columns for statement_handle=%p" , colCount,
29192923 (void *)StatementHandle->get ());
@@ -2949,6 +2953,8 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
29492953 case SQL_CHAR:
29502954 case SQL_VARCHAR:
29512955 case SQL_LONGVARCHAR: {
2956+ #if defined(__APPLE__) || defined(__linux__)
2957+ // On Linux/macOS, the ODBC driver returns UTF-8 for SQL_C_CHAR.
29522958 if (columnSize == SQL_NO_TOTAL || columnSize == 0 ||
29532959 columnSize > SQL_MAX_LOB_SIZE) {
29542960 LOG (" SQLGetData: Streaming LOB for column %d (SQL_C_CHAR) "
@@ -2957,34 +2963,16 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
29572963 row.append (
29582964 FetchLobColumnData (hStmt, i, SQL_C_CHAR, false , false , charEncoding));
29592965 } else {
2960- // Allocate columnSize * 4 + 1 on ALL platforms (no #if guard).
2961- //
2962- // Why this differs from SQLBindColums / FetchBatchData:
2963- // Those two functions use #if to apply *4 only on Linux/macOS,
2964- // because on Windows with a non-UTF-8 collation (e.g. CP1252)
2965- // each character occupies exactly 1 byte, so *1 suffices and
2966- // saves memory across the entire batch (fetchSize × numCols
2967- // buffers).
2968- //
2969- // SQLGetData_wrap allocates a single temporary buffer per
2970- // column per row, so the over-allocation cost is negligible.
2971- // Using *4 unconditionally here keeps the code simple and
2972- // correct on every platform—including Windows with a UTF-8
2973- // collation where multi-byte chars could otherwise cause
2974- // truncation at the exact column boundary (e.g. CP1252 é in
2975- // VARCHAR(10)).
2966+ // Allocate columnSize * 4 + 1 to accommodate UTF-8 expansion.
29762967 uint64_t fetchBufferSize = columnSize * 4 + 1 /* null-termination */ ;
29772968 std::vector<SQLCHAR> dataBuffer (fetchBufferSize);
29782969 SQLLEN dataLen;
29792970 ret = SQLGetData_ptr (hStmt, i, SQL_C_CHAR, dataBuffer.data (), dataBuffer.size (),
29802971 &dataLen);
29812972 if (SQL_SUCCEEDED (ret)) {
2982- // columnSize is in chars, dataLen is in bytes
29832973 if (dataLen > 0 ) {
29842974 uint64_t numCharsInData = dataLen / sizeof (SQLCHAR);
29852975 if (numCharsInData < dataBuffer.size ()) {
2986- // SQLGetData will null-terminate the data
2987- // Use Python's codec system to decode bytes.
29882976 const std::string decodeEncoding =
29892977 GetEffectiveCharDecoding (charEncoding);
29902978 py::bytes raw_bytes (reinterpret_cast <char *>(dataBuffer.data ()),
@@ -3001,11 +2989,9 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
30012989 LOG_ERROR (
30022990 " SQLGetData: Failed to decode CHAR column %d with '%s': %s" ,
30032991 i, decodeEncoding.c_str (), e.what ());
3004- // Return raw bytes as fallback
30052992 row.append (raw_bytes);
30062993 }
30072994 } else {
3008- // Buffer too small, fallback to streaming
30092995 LOG (" SQLGetData: CHAR column %d data truncated "
30102996 " (buffer_size=%zu), using streaming LOB" ,
30112997 i, dataBuffer.size ());
@@ -3037,6 +3023,66 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
30373023 row.append (py::none ());
30383024 }
30393025 }
3026+ #else
3027+ // On Windows, request SQL_C_WCHAR so the ODBC driver converts
3028+ // from the server's native encoding (e.g. CP1252) to UTF-16.
3029+ // This avoids the need to guess the server's code page and
3030+ // eliminates the bytes-vs-str inconsistency.
3031+ if (columnSize == SQL_NO_TOTAL || columnSize == 0 ||
3032+ columnSize > SQL_MAX_LOB_SIZE) {
3033+ LOG (" SQLGetData: Streaming LOB for column %d (VARCHAR as SQL_C_WCHAR) "
3034+ " - columnSize=%lu" ,
3035+ i, (unsigned long )columnSize);
3036+ row.append (FetchLobColumnData (hStmt, i, SQL_C_WCHAR, true , false , " utf-16le" ));
3037+ } else {
3038+ uint64_t fetchBufferSize =
3039+ (columnSize + 1 ) * sizeof (SQLWCHAR); // +1 for null terminator
3040+ std::vector<SQLWCHAR> dataBuffer (columnSize + 1 );
3041+ SQLLEN dataLen;
3042+ ret = SQLGetData_ptr (hStmt, i, SQL_C_WCHAR, dataBuffer.data (), fetchBufferSize,
3043+ &dataLen);
3044+ if (SQL_SUCCEEDED (ret)) {
3045+ if (dataLen > 0 ) {
3046+ uint64_t numCharsInData = dataLen / sizeof (SQLWCHAR);
3047+ if (numCharsInData < dataBuffer.size ()) {
3048+ std::wstring wstr (reinterpret_cast <wchar_t *>(dataBuffer.data ()));
3049+ row.append (py::cast (wstr));
3050+ LOG (" SQLGetData: VARCHAR column %d decoded via SQL_C_WCHAR, "
3051+ " length=%lu" ,
3052+ i, (unsigned long )numCharsInData);
3053+ } else {
3054+ LOG (" SQLGetData: VARCHAR column %d data truncated "
3055+ " (as WCHAR), using streaming LOB" ,
3056+ i);
3057+ row.append (FetchLobColumnData (hStmt, i, SQL_C_WCHAR, true , false ,
3058+ " utf-16le" ));
3059+ }
3060+ } else if (dataLen == SQL_NULL_DATA) {
3061+ LOG (" SQLGetData: Column %d is NULL (VARCHAR via WCHAR)" , i);
3062+ row.append (py::none ());
3063+ } else if (dataLen == 0 ) {
3064+ row.append (py::str (" " ));
3065+ } else if (dataLen == SQL_NO_TOTAL) {
3066+ LOG (" SQLGetData: Cannot determine data length "
3067+ " (SQL_NO_TOTAL) for column %d (VARCHAR via WCHAR), "
3068+ " returning NULL" ,
3069+ i);
3070+ row.append (py::none ());
3071+ } else if (dataLen < 0 ) {
3072+ LOG (" SQLGetData: Unexpected negative data length "
3073+ " for column %d (VARCHAR via WCHAR) - dataLen=%ld" ,
3074+ i, (long )dataLen);
3075+ ThrowStdException (" SQLGetData returned an unexpected negative "
3076+ " data length" );
3077+ }
3078+ } else {
3079+ LOG (" SQLGetData: Error retrieving data for column %d "
3080+ " (VARCHAR via WCHAR) - SQLRETURN=%d, returning NULL" ,
3081+ i, ret);
3082+ row.append (py::none ());
3083+ }
3084+ }
3085+ #endif
30403086 break ;
30413087 }
30423088 case SQL_SS_XML: {
@@ -3487,29 +3533,26 @@ SQLRETURN SQLBindColums(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& column
34873533 // TODO: handle variable length data correctly. This logic wont
34883534 // suffice
34893535 HandleZeroColumnSizeAtFetch (columnSize);
3490- // Use columnSize * 4 + 1 on Linux/macOS to accommodate UTF-8
3491- // expansion. The ODBC driver returns UTF-8 for SQL_C_CHAR where
3492- // each character can be up to 4 bytes.
34933536#if defined(__APPLE__) || defined(__linux__)
3537+ // On Linux/macOS, the ODBC driver returns UTF-8 for SQL_C_CHAR
3538+ // where each character can be up to 4 bytes.
34943539 uint64_t fetchBufferSize = columnSize * 4 + 1 /* null-terminator*/ ;
3495- #else
3496- uint64_t fetchBufferSize = columnSize + 1 /* null-terminator*/ ;
3497- #endif
3498- // TODO: For LONGVARCHAR/BINARY types, columnSize is returned as
3499- // 2GB-1 by SQLDescribeCol. So fetchBufferSize = 2GB.
3500- // fetchSize=1 if columnSize>1GB. So we'll allocate a vector of
3501- // size 2GB. If a query fetches multiple (say N) LONG...
3502- // columns, we will have allocated multiple (N) 2GB sized
3503- // vectors. This will make driver very slow. And if the N is
3504- // high enough, we could hit the OS limit for heap memory that
3505- // we can allocate, & hence get a std::bad_alloc. The process
3506- // could also be killed by OS for consuming too much memory.
3507- // Hence this will be revisited in beta to not allocate 2GB+
3508- // memory, & use streaming instead
35093540 buffers.charBuffers [col - 1 ].resize (fetchSize * fetchBufferSize);
35103541 ret = SQLBindCol_ptr (hStmt, col, SQL_C_CHAR, buffers.charBuffers [col - 1 ].data (),
35113542 fetchBufferSize * sizeof (SQLCHAR),
35123543 buffers.indicators [col - 1 ].data ());
3544+ #else
3545+ // On Windows, the ODBC driver returns bytes in the server's
3546+ // native encoding (e.g., CP1252). Rather than guessing the
3547+ // code page, we request SQL_C_WCHAR so the driver performs
3548+ // the conversion to UTF-16 — exactly matching how NVARCHAR
3549+ // columns are already handled.
3550+ uint64_t fetchBufferSize = columnSize + 1 /* null-terminator*/ ;
3551+ buffers.wcharBuffers [col - 1 ].resize (fetchSize * fetchBufferSize);
3552+ ret = SQLBindCol_ptr (hStmt, col, SQL_C_WCHAR, buffers.wcharBuffers [col - 1 ].data (),
3553+ fetchBufferSize * sizeof (SQLWCHAR),
3554+ buffers.indicators [col - 1 ].data ());
3555+ #endif
35133556 break ;
35143557 }
35153558 case SQL_WCHAR:
@@ -3675,9 +3718,9 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
36753718 HandleZeroColumnSizeAtFetch (columnInfos[col].processedColumnSize );
36763719 // On Linux/macOS, the ODBC driver returns UTF-8 for SQL_C_CHAR where
36773720 // each character can be up to 4 bytes. Must match SQLBindColums buffer.
3678- #if defined(__APPLE__) || defined(__linux__)
36793721 SQLSMALLINT dt = columnInfos[col].dataType ;
36803722 bool isCharType = (dt == SQL_CHAR || dt == SQL_VARCHAR || dt == SQL_LONGVARCHAR);
3723+ #if defined(__APPLE__) || defined(__linux__)
36813724 if (isCharType) {
36823725 columnInfos[col].fetchBufferSize = columnInfos[col].processedColumnSize * 4 +
36833726 1 ; // *4 for UTF-8, +1 for null terminator
@@ -3686,6 +3729,10 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
36863729 columnInfos[col].processedColumnSize + 1 ; // +1 for null terminator
36873730 }
36883731#else
3732+ // On Windows, VARCHAR columns are fetched as SQL_C_WCHAR (see
3733+ // SQLBindColums). The fetchBufferSize is in SQLWCHAR elements,
3734+ // matching the wcharBuffers layout.
3735+ (void )isCharType; // same formula for all types on Windows
36893736 columnInfos[col].fetchBufferSize =
36903737 columnInfos[col].processedColumnSize + 1 ; // +1 for null terminator
36913738#endif
@@ -3740,7 +3787,14 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
37403787 case SQL_CHAR:
37413788 case SQL_VARCHAR:
37423789 case SQL_LONGVARCHAR:
3790+ #if defined(__APPLE__) || defined(__linux__)
37433791 columnProcessors[col] = ColumnProcessors::ProcessChar;
3792+ #else
3793+ // On Windows, VARCHAR columns are fetched as SQL_C_WCHAR
3794+ // (the driver converts from the server's native encoding to
3795+ // UTF-16), so we reuse the NVARCHAR processor.
3796+ columnProcessors[col] = ColumnProcessors::ProcessWChar;
3797+ #endif
37443798 break ;
37453799 case SQL_WCHAR:
37463800 case SQL_WVARCHAR:
@@ -4048,7 +4102,8 @@ size_t calculateRowSize(py::list& columnNames, SQLUSMALLINT numCols) {
40484102 break ;
40494103 case SQL_SS_UDT:
40504104 rowSize += (static_cast <SQLLEN>(columnSize) == SQL_NO_TOTAL || columnSize == 0 )
4051- ? SQL_MAX_LOB_SIZE : columnSize;
4105+ ? SQL_MAX_LOB_SIZE
4106+ : columnSize;
40524107 break ;
40534108 case SQL_BINARY:
40544109 case SQL_VARBINARY:
@@ -4112,8 +4167,7 @@ SQLRETURN FetchMany_wrap(SqlHandlePtr StatementHandle, py::list& rows, int fetch
41124167
41134168 if ((dataType == SQL_WVARCHAR || dataType == SQL_WLONGVARCHAR || dataType == SQL_VARCHAR ||
41144169 dataType == SQL_LONGVARCHAR || dataType == SQL_VARBINARY ||
4115- dataType == SQL_LONGVARBINARY || dataType == SQL_SS_XML ||
4116- dataType == SQL_SS_UDT) &&
4170+ dataType == SQL_LONGVARBINARY || dataType == SQL_SS_XML || dataType == SQL_SS_UDT) &&
41174171 (columnSize == 0 || columnSize == SQL_NO_TOTAL || columnSize > SQL_MAX_LOB_SIZE)) {
41184172 lobColumns.push_back (i + 1 ); // 1-based
41194173 }
@@ -4252,8 +4306,7 @@ SQLRETURN FetchAll_wrap(SqlHandlePtr StatementHandle, py::list& rows,
42524306
42534307 if ((dataType == SQL_WVARCHAR || dataType == SQL_WLONGVARCHAR || dataType == SQL_VARCHAR ||
42544308 dataType == SQL_LONGVARCHAR || dataType == SQL_VARBINARY ||
4255- dataType == SQL_LONGVARBINARY || dataType == SQL_SS_XML ||
4256- dataType == SQL_SS_UDT) &&
4309+ dataType == SQL_LONGVARBINARY || dataType == SQL_SS_XML || dataType == SQL_SS_UDT) &&
42574310 (columnSize == 0 || columnSize == SQL_NO_TOTAL || columnSize > SQL_MAX_LOB_SIZE)) {
42584311 lobColumns.push_back (i + 1 ); // 1-based
42594312 }
0 commit comments