datasets
- class marius.tools.preprocess.dataset.Dataset(output_directory, spark=False)
Abstract dataset class
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- node_type_file: pathlib.Path
- dataset_name: str
- dataset_url: str
- __init__(output_directory, spark=False)
- output_directory: pathlib.Path
- spark: bool
- edge_list_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_features_file: pathlib.Path
- abstract download(overwrite=False)
- abstract preprocess()
- Return type
- class marius.tools.preprocess.datasets.fb15k.FB15K(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False)
- preprocess(num_partitions=1, remap_ids=True, splits=None, sequential_train_nodes=False, partitioned_eval=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool
- class marius.tools.preprocess.datasets.fb15k_237.FB15K237(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False)
- preprocess(num_partitions=1, remap_ids=True, splits=None, sequential_train_nodes=False, partitioned_eval=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool
- class marius.tools.preprocess.datasets.freebase86m.Freebase86m(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False)
- preprocess(num_partitions=1, remap_ids=True, splits=None, sequential_train_nodes=False, partitioned_eval=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool
- class marius.tools.preprocess.datasets.friendster.Friendster(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False)
- preprocess(num_partitions=1, remap_ids=True, splits=None, sequential_train_nodes=False, generate_random_features=False, node_feature_dim=32, num_classes=50, node_splits=[0.1, 0.05, 0.05], partitioned_eval=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool
- class marius.tools.preprocess.datasets.livejournal.Livejournal(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False)
- preprocess(num_partitions=1, remap_ids=True, splits=[0.9, 0.05, 0.05], sequential_train_nodes=False, partitioned_eval=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool
- class marius.tools.preprocess.datasets.ogb_mag240m.OGBMag240M(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False)
- preprocess(num_partitions=1, remap_ids=True, splits=None, sequential_train_nodes=False, partitioned_eval=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool
- class marius.tools.preprocess.datasets.ogb_wikikg90mv2.OGBWikiKG90Mv2(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False)
- preprocess(num_partitions=1, remap_ids=True, splits=None, sequential_train_nodes=False, partitioned_eval=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool
- class marius.tools.preprocess.datasets.ogbl_citation2.OGBLCitation2(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False)
- preprocess(num_partitions=1, remap_ids=True, splits=None, sequential_train_nodes=False, partitioned_eval=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool
- class marius.tools.preprocess.datasets.ogbl_ppa.OGBLPpa(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False, remap_ids=True)
- preprocess(num_partitions=1, remap_ids=True, splits=None, sequential_train_nodes=False, partitioned_eval=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool
- class marius.tools.preprocess.datasets.ogbl_wikikg2.OGBLWikiKG2(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False)
- preprocess(num_partitions=1, remap_ids=True, splits=None, sequential_train_nodes=False, partitioned_eval=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool
- class marius.tools.preprocess.datasets.ogbn_arxiv.OGBNArxiv(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False)
- preprocess(num_partitions=1, remap_ids=True, splits=None, sequential_train_nodes=False, partitioned_eval=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool
- class marius.tools.preprocess.datasets.ogbn_papers100m.OGBNPapers100M(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False)
- preprocess(num_partitions=1, remap_ids=True, splits=None, sequential_train_nodes=False, partitioned_eval=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool
- class marius.tools.preprocess.datasets.ogbn_products.OGBNProducts(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False)
- preprocess(num_partitions=1, remap_ids=True, splits=None, sequential_train_nodes=False, partitioned_eval=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool
- class marius.tools.preprocess.datasets.twitter.Twitter(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
spark (bool) –
- __init__(output_directory, spark=False)
- Parameters
output_directory (pathlib.Path) –
- dataset_name: str
- dataset_url: str
- download(overwrite=False)
- preprocess(num_partitions=1, remap_ids=True, splits=[0.9, 0.05, 0.05], sequential_train_nodes=False)
- edge_list_file: pathlib.Path
- edge_features_file: pathlib.Path
- node_mapping_file: pathlib.Path
- node_features_file: pathlib.Path
- relation_mapping_file: pathlib.Path
- relation_features_file: pathlib.Path
- node_type_file: pathlib.Path
- output_directory: pathlib.Path
- spark: bool