Data set for supervised learning. More...
#include <shark/Data/Dataset.h>
Public Types | |
typedef InputT | InputType |
typedef LabelT | LabelType |
typedef UnlabeledData< InputT > | InputContainer |
typedef Data< LabelT > | LabelContainer |
typedef InputContainer::IndexSet | IndexSet |
typedef InputLabelBatch< typename Batch< InputType >::type &, typename Batch< LabelType >::type & > | batch_reference |
typedef InputLabelBatch< typename Batch< InputType >::type const &, typename Batch< LabelType >::type const & > | const_batch_reference |
typedef batch_reference::reference | element_reference |
typedef const_batch_reference::const_reference | const_element_reference |
typedef boost::iterator_range< detail::DataElementIterator< LabeledData< InputType, LabelType > > > | element_range |
typedef boost::iterator_range< detail::DataElementIterator< LabeledData< InputType, LabelType > const > > | const_element_range |
typedef detail::BatchRange< LabeledData< InputType, LabelType > > | batch_range |
typedef detail::BatchRange< LabeledData< InputType, LabelType > const > | const_batch_range |
Public Member Functions | |
const_element_range | elements () const |
Returns the range of elements. | |
element_range | elements () |
Returns therange of elements. | |
const_batch_range | batches () const |
Returns the range of batches. | |
batch_range | batches () |
Returns the range of batches. | |
std::size_t | numberOfBatches () const |
Returns the number of batches of the set. | |
std::size_t | numberOfElements () const |
Returns the total number of elements. | |
bool | empty () const |
Check whether the set is empty. | |
InputContainer const & | inputs () const |
Access to inputs as a separate container. | |
InputContainer & | inputs () |
Access to inputs as a separate container. | |
LabelContainer const & | labels () const |
Access to labels as a separate container. | |
LabelContainer & | labels () |
Access to labels as a separate container. | |
LabeledData () | |
Empty data set. | |
LabeledData (std::size_t numBatches) | |
Create an empty set with just the correct number of batches. | |
LabeledData (std::size_t size, element_type const &element, std::size_t batchSize=DefaultBatchSize) | |
LabeledData (Data< InputType > const &inputs, Data< LabelType > const &labels) | |
Construction from data. | |
element_reference | element (std::size_t i) |
const_element_reference | element (std::size_t i) const |
batch_reference | batch (std::size_t i) |
const_batch_reference | batch (std::size_t i) const |
Shape const & | inputShape () const |
Returns the Shape of the inputs. | |
Shape & | inputShape () |
Returns the Shape of the inputs. | |
Shape const & | labelShape () const |
Returns the Shape of the labels. | |
Shape & | labelShape () |
Returns the Shape of the labels. | |
void | read (InArchive &archive) |
from ISerializable | |
void | write (OutArchive &archive) const |
from ISerializable | |
virtual void | makeIndependent () |
This method makes the vector independent of all siblings and parents. | |
void | splitBatch (std::size_t batch, std::size_t elementIndex) |
LabeledData | splice (std::size_t batch) |
Splits the container into two independent parts. The left part remains in the container, the right is stored as return type. | |
void | append (LabeledData const &other) |
Appends the contents of another data object to the end. | |
void | push_back (typename Batch< InputType >::type const &inputs, typename Batch< LabelType >::type const &labels) |
void | push_back (const_batch_reference batch) |
template<class Range > | |
void | repartition (Range const &batchSizes) |
Reorders the batch structure in the container to that indicated by the batchSizes vector. | |
std::vector< std::size_t > | getPartitioning () const |
Creates a vector with the batch sizes of every batch. | |
template<class Range > | |
void | reorderElements (Range const &indices) |
void | shuffle () |
shuffles all elements in the entire dataset (that is, also across the batches) | |
LabeledData | indexedSubset (IndexSet const &indices) const |
Fill in the subset defined by the list of indices. | |
Public Member Functions inherited from shark::ISerializable | |
virtual | ~ISerializable () |
Virtual d'tor. | |
void | load (InArchive &archive, unsigned int version) |
Versioned loading of components, calls read(...). | |
void | save (OutArchive &archive, unsigned int version) const |
Versioned storing of components, calls write(...). | |
BOOST_SERIALIZATION_SPLIT_MEMBER () | |
Static Public Attributes | |
static const std::size_t | DefaultBatchSize = InputContainer::DefaultBatchSize |
Protected Attributes | |
InputContainer | m_data |
LabelContainer | m_label |
point data | |
Friends | |
void | swap (LabeledData &a, LabeledData &b) |
Data set for supervised learning.
The LabeledData class extends UnlabeledData for the representation of inputs. In addition it holds and provides access to the corresponding labels.
LabeledData tries to mimic the underlying data as pairs of input and label data. this means that when accessing a batch by calling batch(i) or choosing one of the iterators one access the input batch by batch(i).input and the labels by batch(i).label
this also holds true for single element access using operator(). Be aware, that direct access to an element is a linear time operation. So it is not advisable to iterate over the elements, but instead iterate over the batches.