Datasets Size Comparison#

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from deepchecks.tabular import Dataset
from deepchecks.tabular.checks import DatasetsSizeComparison
df = pd.DataFrame(np.random.randn(1000, 3), columns=['x1', 'x2', 'x3'])
df['label'] = df['x2'] + 0.1 * df['x1']

train, test = train_test_split(df, test_size=0.4)
train = Dataset(train, features=['x1', 'x2', 'x3'], label='label')
test = Dataset(test, features=['x1', 'x2', 'x3'], label='label')

check_instance = (
Datasets Size Comparison

Total running time of the script: (0 minutes 0.022 seconds)

Gallery generated by Sphinx-Gallery