From 568aace62edc75b9c21dd99b9065b81271019f46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Moura?= <joaomdmoura@gmail.com>
Date: Fri, 14 Mar 2025 07:48:39 -0700
Subject: [PATCH] fix

---
 .../databricks_query_tool.py                  | 59 +++++++++++--------
 1 file changed, 35 insertions(+), 24 deletions(-)

diff --git a/src/crewai_tools/tools/databricks_query_tool/databricks_query_tool.py b/src/crewai_tools/tools/databricks_query_tool/databricks_query_tool.py
index 1761f56c0..24ed6e6a0 100644
--- a/src/crewai_tools/tools/databricks_query_tool/databricks_query_tool.py
+++ b/src/crewai_tools/tools/databricks_query_tool/databricks_query_tool.py
@@ -345,7 +345,12 @@ class DatabricksQueryTool(BaseTool):
                     # Dump the raw structure of result data to help troubleshoot
                     if hasattr(result.result, 'data_array'):
                         print(f"data_array structure: {type(result.result.data_array)}")
-                        if result.result.data_array and len(result.result.data_array) > 0:
+                        # Add defensive check for None data_array
+                        if result.result.data_array is None:
+                            print("data_array is None - likely an empty result set or DDL query")
+                            # Return empty result handling rather than trying to process null data
+                            return "Query executed successfully (no data returned)"
+                        elif result.result.data_array and len(result.result.data_array) > 0:
                             print(f"First chunk type: {type(result.result.data_array[0])}")
                             if len(result.result.data_array[0]) > 0:
                                 print(f"First row type: {type(result.result.data_array[0][0])}")
@@ -354,43 +359,49 @@ class DatabricksQueryTool(BaseTool):
                     # IMPROVED DETECTION LOGIC: Check if we're possibly dealing with rows where each item
                     # contains a single value or character (which could indicate incorrect row structure)
                     is_likely_incorrect_row_structure = False
-                    sample_size = min(20, len(result.result.data_array[0]))
 
-                    if sample_size > 0:
-                        single_char_count = 0
-                        single_digit_count = 0
-                        total_items = 0
+                    # Only try to analyze sample if data_array exists and has content
+                    if hasattr(result.result, 'data_array') and result.result.data_array and len(result.result.data_array) > 0 and len(result.result.data_array[0]) > 0:
+                        sample_size = min(20, len(result.result.data_array[0]))
 
-                        for i in range(sample_size):
-                            val = result.result.data_array[0][i]
-                            total_items += 1
-                            if isinstance(val, str) and len(val) == 1 and not val.isdigit():
-                                single_char_count += 1
-                            elif isinstance(val, str) and len(val) == 1 and val.isdigit():
-                                single_digit_count += 1
+                        if sample_size > 0:
+                            single_char_count = 0
+                            single_digit_count = 0
+                            total_items = 0
 
-                        # If a significant portion of the first values are single characters or digits,
-                        # this likely indicates data is being incorrectly structured
-                        if total_items > 0 and (single_char_count + single_digit_count) / total_items > 0.5:
-                            print(f"Detected potential incorrect row structure: {single_char_count} single chars, {single_digit_count} digits out of {total_items} total items")
-                            is_likely_incorrect_row_structure = True
+                            for i in range(sample_size):
+                                val = result.result.data_array[0][i]
+                                total_items += 1
+                                if isinstance(val, str) and len(val) == 1 and not val.isdigit():
+                                    single_char_count += 1
+                                elif isinstance(val, str) and len(val) == 1 and val.isdigit():
+                                    single_digit_count += 1
+
+                            # If a significant portion of the first values are single characters or digits,
+                            # this likely indicates data is being incorrectly structured
+                            if total_items > 0 and (single_char_count + single_digit_count) / total_items > 0.5:
+                                print(f"Detected potential incorrect row structure: {single_char_count} single chars, {single_digit_count} digits out of {total_items} total items")
+                                is_likely_incorrect_row_structure = True
 
                     # Additional check: if many rows have just 1 item when we expect multiple columns
-                    rows_with_single_item = sum(1 for row in result.result.data_array[:sample_size] if isinstance(row, list) and len(row) == 1)
-                    if rows_with_single_item > sample_size * 0.5 and len(columns) > 1:
-                        print(f"Many rows ({rows_with_single_item}/{sample_size}) have only a single value when expecting {len(columns)} columns")
-                        is_likely_incorrect_row_structure = True
+                    rows_with_single_item = 0
+                    if hasattr(result.result, 'data_array') and result.result.data_array and len(result.result.data_array) > 0:
+                        sample_size_for_rows = min(sample_size, len(result.result.data_array[0])) if 'sample_size' in locals() else min(20, len(result.result.data_array[0]))
+                        rows_with_single_item = sum(1 for row in result.result.data_array[0][:sample_size_for_rows] if isinstance(row, list) and len(row) == 1)
+                        if rows_with_single_item > sample_size_for_rows * 0.5 and len(columns) > 1:
+                            print(f"Many rows ({rows_with_single_item}/{sample_size_for_rows}) have only a single value when expecting {len(columns)} columns")
+                            is_likely_incorrect_row_structure = True
 
                     # Check if we're getting primarily single characters or the data structure seems off,
                     # we should use special handling
-                    if is_likely_incorrect_row_structure:
+                    if 'is_likely_incorrect_row_structure' in locals() and is_likely_incorrect_row_structure:
                         print("Data appears to be malformed - will use special row reconstruction")
                         needs_special_string_handling = True
                     else:
                         needs_special_string_handling = False
 
                     # Process results differently based on detection
-                    if needs_special_string_handling:
+                    if 'needs_special_string_handling' in locals() and needs_special_string_handling:
                         # We're dealing with data where the rows may be incorrectly structured
                         print("Using row reconstruction processing mode")