privkit.datasets package#

class privkit.datasets.CabspottingDataset#

Bases: Dataset

Class to handle the Cabspotting dataset.

DATASET_ID = 'cabspotting'#

DATASET_INFO = 'Cabspotting dataset is a dataset of taxi trajectories over the city of San Francisco, California, USA. The trajectories belong to 536 taxis and were collected over a period of 30 days, containing not only the GPS position and timestamp, but also whether the cab had a costumer at each time.'#

DATASET_NAME = 'Cabspotting Dataset'#

DATA_TYPE_ID = ['location_data']#

download(url: str, savepath: str = './input/datasets/')#

Downloads Cabspotting dataset from IEEE Data Port if the authorized URL is provided. To proceed with the download through the Privkit, the user is required to login in the website https://ieee-dataport.org/open-access/crawdad-epflmobility, click on the “cabspottingdata.tar.gz” button and copy the url.

Parameters:

url (str) – url that results from clicking on the “cabspottingdata.tar.gz” button after logging in the website https://ieee-dataport.org/open-access/crawdad-epflmobility.
savepath (str) – path where dataset should be saved. The default is constants.datasets_folder.

filter_by_occupancy(occupancy: int = 1)#

Filters trajectories by occupancy.

Parameters:: occupancy (int) – 1 is occupied and 0 is free. The default value is 1, that is, it filters trajectories with occupancy.

load_dataset()#: Loads cabspotting dataset

load_from_files()#

process_dataset()#: Performs dataset processing or returns dataset processing methods. This is specific to the dataset

save_dataset()#: Saves dataset

class privkit.datasets.Dataset#

Bases: ABC

Abstract class for a generic dataset. Defines a series of functions common to process different datasets. Provides basic functions to load, process, and save data. Requires the definition of a DATASET_ID, DATASET_NAME, DATASET_INFO, and DATA_TYPE_ID.

property DATASET_ID: str#: Identifier of the dataset

property DATASET_INFO: str#: Information of the dataset, specifying the reference for the dataset (if it exists)

property DATASET_NAME: str#: Name of the dataset

property DATA_TYPE_ID: List[str]#: Identifier of the data type of this dataset

abstract load_dataset(*args)#: Loads dataset. This is specific to the dataset

abstract process_dataset(*args)#: Performs dataset processing or returns dataset processing methods. This is specific to the dataset

abstract save_dataset(*args)#: Saves the dataset to a file. This is specific to the dataset

class privkit.datasets.GeolifeDataset#

Bases: Dataset

Class to handle the Geolife dataset.

DATASET_ID = 'geolife'#

DATASET_INFO = 'Geolife dataset is a real mobility dataset collected in a period of over three year from GPS devices. The dataset contains data from 182 users, 17621 trajectories and roughly 25 million reports.'#

DATASET_NAME = 'Geolife Dataset'#

DATA_TYPE_ID = ['location_data']#

static apply_labels(trajectory_df: pandas.DataFrame, labels_df: pandas.DataFrame)#

Applies the labels from the loaded file to the trajectories dataframe

Parameters:

trajectory_df (pd.DataFrame) – trajectory dataframe previously loaded from dataset files
labels_df (pd.DataFrame) – labels data previously loaded from dataset files

download(savepath: str = './input/datasets/')#

Dowloads Geolife dataset.

Parameters:: savepath (str) – path where dataset should be saved. The default is constants.datasets_folder.

load_dataset()#: Loads Geolife dataset

static load_labels(labels_file: str) → pandas.DataFrame#

Loads the labels from a given file

Parameters:: labels_file (str) – filename of the file that should be loaded
Returns:: returns the loaded file as a Pandas Dataframe

static load_plt(plt_file: str, trajectory_id: int) → pandas.DataFrame#

Loads the trajectory data given the filename

Parameters:

plt_file (str) – filename of the file that should be loaded
trajectory_id (int) – corresponding trajectory number of the loading file

Returns:

returns the loaded file as a Pandas Dataframe

process_dataset()#: Performs dataset processing or returns dataset processing methods. This is specific to the dataset

save_dataset()#: Saves dataset

class privkit.datasets.SketchfabDataset(api_token: str)#

Bases: Dataset

Class to download facial models from the Sketchfab dataset.

DATASET_ID = 'sketchfab'#

DATASET_INFO = 'Sketchfab is a 3D asset website used to publish, share, discover, buy and sell 3D, VR and AR content. The Sketchfab dataset provided in the Privkit allows to download models from Sketchfab, but also provides some model as examples.'#

DATASET_NAME = 'Sketchfab Dataset'#

DATA_TYPE_ID = ['facial_data']#

api_token#: API token for the Sketchfab website

data#: Data is stored as an a FacialData structure

download_model(uid: str, filename: str | None = None, savepath: str = './input/datasets/')#

Downloads the model from the Sketchfab website given the model UID.

Parameters:

uid (str) – model UID
filename (str) – file name where the model should be saved
savepath (str) – path where dataset should be saved. The default is constants.datasets_folder.

load_dataset(model_id: int = 1)#: Loads an example of facial model from the available.

process_dataset()#: Performs dataset processing or returns dataset processing methods. This is specific to the dataset

save_dataset(filename: str | None = None)#

Saves dataset

Parameters:: filename (str) – file name where the model should be saved