Skip to main content

Crate dataprof

Crate dataprof 

Source

Re-exports§

pub use engines::columnar::ArrowProfiler;
pub use api::DataProfiler;
pub use api::quick_quality_check;
pub use api::stream_profile;
pub use core::batch::BatchConfig;
pub use core::batch::BatchProcessor;
pub use core::batch::BatchResult;
pub use core::batch::BatchSummary;
pub use core::errors::DataProfilerError;
pub use core::errors::ErrorSeverity;
pub use core::sampling::ChunkSize;
pub use core::sampling::SamplingStrategy;
pub use parsers::robust_csv::CsvDiagnostics;
pub use core::config::DataprofConfig;
pub use core::config::DataprofConfigBuilder;
pub use core::exit_codes;
pub use core::validation::InputValidator;
pub use core::validation::ValidationError;
pub use engines::streaming::ProgressInfo;
pub use engines::AdaptiveProfiler;
pub use engines::EnginePerformance;
pub use engines::ProcessingType;
pub use engines::DataFusionLoader;
pub use output::html::generate_html_report;
pub use types::ColumnProfile;
pub use types::ColumnStats;
pub use types::DataFrameLibrary;
pub use types::DataQualityMetrics;
pub use types::DataSource;
pub use types::DataType;
pub use types::FileFormat;
pub use types::OutputFormat;
pub use types::Pattern;
pub use types::QualityReport;
pub use types::QueryEngine;
pub use types::ScanInfo;
pub use parsers::csv::analyze_csv;
pub use parsers::csv::analyze_csv_fast;
pub use parsers::csv::analyze_csv_robust;
pub use parsers::csv::analyze_csv_with_sampling;
pub use parsers::csv::analyze_csv_with_verbosity;
pub use parsers::json::analyze_json;
pub use parsers::json::analyze_json_with_quality;
pub use parsers::parquet::ParquetConfig;
pub use parsers::parquet::analyze_parquet_with_config;
pub use parsers::parquet::analyze_parquet_with_quality;
pub use parsers::parquet::is_parquet_file;
pub use analysis::MetricsCalculator;
pub use analysis::analyze_column_fast;
pub use analysis::detect_patterns;
pub use analysis::infer_type;
pub use stats::calculate_numeric_stats;
pub use stats::calculate_text_stats;
pub use database::DatabaseConfig;
pub use database::DatabaseConnector;
pub use database::DatabaseCredentials;
pub use database::MySqlConnector;
pub use database::PostgresConnector;
pub use database::RetryConfig;
pub use database::SamplingConfig;
pub use database::SamplingStrategy as DbSamplingStrategy;
pub use database::SqliteConnector;
pub use database::SslConfig;
pub use database::analyze_database;
pub use database::create_connector;

Modules§

acceleration
analysis
api
core
database
Database connectivity module for DataProfiler
engines
output
parsers
python
serde_helpers
Custom serde serialization helpers for formatting numeric values with appropriate precision
stats
types

Macros§

process_rows_to_columns
Macro to process rows into column-oriented HashMap. Used for single-query (non-streaming) profiling.
streaming_profile_loop
Macro to generate the streaming batch loop for profiling queries. Handles the common pattern while allowing database-specific pool types. Includes inline row processing to avoid complex generic trait bounds.

Functions§

check_memory_leaks
Global memory leak detection utility
get_memory_usage_stats
Get global memory usage statistics