taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

commit 12fbc9b96ea1ef2727c87c02fe8d2305235b4d15
parent 43e106e6630030dd34813295fe1d07bb86025402
Author: Étienne Simon <esimon@esimon.eu>
Date:   Wed, 29 Apr 2015 19:42:47 -0400

Change origin_call ids

Diffstat:
Mconvert_data.py | 3++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/convert_data.py b/convert_data.py @@ -104,8 +104,9 @@ def convert(input_directory, save_path): h5file = h5py.File(save_path, 'w') split = {} split.update(read_stands(input_directory, h5file)) - split.update(read_taxis(input_directory, h5file, 'test', 'test_')) split.update(read_taxis(input_directory, h5file, 'train', '')) + print 'First origin_call not present in training set: ', len(origin_call_dict) + split.update(read_taxis(input_directory, h5file, 'test', 'test_')) split.update(unique(h5file)) h5file.attrs['split'] = H5PYDataset.create_split_array(split) h5file.flush()