diff --git a/scripts/data/load_toronto_data.py b/scripts/data/load_toronto_data.py index 6a29459..dc625be 100644 --- a/scripts/data/load_toronto_data.py +++ b/scripts/data/load_toronto_data.py @@ -288,6 +288,10 @@ class DataPipeline: logger.info("Running dbt transformations...") dbt_project_dir = PROJECT_ROOT / "dbt" + venv_dbt = PROJECT_ROOT / ".venv" / "bin" / "dbt" + + # Use venv dbt if available, otherwise fall back to system dbt + dbt_cmd = str(venv_dbt) if venv_dbt.exists() else "dbt" if not dbt_project_dir.exists(): logger.error(f"dbt project directory not found: {dbt_project_dir}") @@ -302,16 +306,14 @@ class DataPipeline: # Run dbt models logger.info(" Running dbt run...") result = subprocess.run( - ["dbt", "run", "--profiles-dir", str(dbt_project_dir)], + [dbt_cmd, "run", "--profiles-dir", str(dbt_project_dir)], cwd=dbt_project_dir, capture_output=True, text=True, ) if result.returncode != 0: - logger.error(f"dbt run failed:\n{result.stderr}") - if self.verbose: - logger.debug(f"dbt output:\n{result.stdout}") + logger.error(f"dbt run failed:\n{result.stdout}\n{result.stderr}") return False logger.info(" dbt run completed successfully") @@ -319,14 +321,16 @@ class DataPipeline: # Run dbt tests logger.info(" Running dbt test...") result = subprocess.run( - ["dbt", "test", "--profiles-dir", str(dbt_project_dir)], + [dbt_cmd, "test", "--profiles-dir", str(dbt_project_dir)], cwd=dbt_project_dir, capture_output=True, text=True, ) if result.returncode != 0: - logger.warning(f"dbt test had failures:\n{result.stderr}") + logger.warning( + f"dbt test had failures:\n{result.stdout}\n{result.stderr}" + ) # Don't fail on test failures, just warn else: logger.info(" dbt test completed successfully")