68 lines
1.6 KiB
Plaintext
68 lines
1.6 KiB
Plaintext
schema: '2.0'
|
|
stages:
|
|
prepare:
|
|
cmd: python src/prepare.py data/data.xml
|
|
deps:
|
|
- path: data/data.xml
|
|
hash: md5
|
|
md5: 22a1a2931c8370d3aeedd7183606fd7f
|
|
size: 14445097
|
|
- path: src/prepare.py
|
|
hash: md5
|
|
md5: f54d670ac8a4f63206781fc31d1f2651
|
|
size: 2231
|
|
params:
|
|
params.yaml:
|
|
prepare.seed: 20170428
|
|
prepare.split: 0.2
|
|
outs:
|
|
- path: data/prepared
|
|
hash: md5
|
|
md5: 153aad06d376b6595932470e459ef42a.dir
|
|
size: 8437363
|
|
nfiles: 2
|
|
featurize:
|
|
cmd: python src/featurization.py data/prepared data/features
|
|
deps:
|
|
- path: data/prepared
|
|
hash: md5
|
|
md5: 153aad06d376b6595932470e459ef42a.dir
|
|
size: 8437363
|
|
nfiles: 2
|
|
- path: src/featurization.py
|
|
hash: md5
|
|
md5: 29660042a8c24019fa7392f2e1a735b9
|
|
size: 4175
|
|
params:
|
|
params.yaml:
|
|
featurize.max_features: 100
|
|
featurize.ngrams: 1
|
|
outs:
|
|
- path: data/features
|
|
hash: md5
|
|
md5: 74642e90419272839886d8e51f730b44.dir
|
|
size: 1556292
|
|
nfiles: 2
|
|
train:
|
|
cmd: python src/train.py data/features model.pkl
|
|
deps:
|
|
- path: data/features
|
|
hash: md5
|
|
md5: 74642e90419272839886d8e51f730b44.dir
|
|
size: 1556292
|
|
nfiles: 2
|
|
- path: src/train.py
|
|
hash: md5
|
|
md5: 324001573ed724e5ae092226fcf9ca30
|
|
size: 1666
|
|
params:
|
|
params.yaml:
|
|
train.min_split: 0.01
|
|
train.n_est: 50
|
|
train.seed: 20170428
|
|
outs:
|
|
- path: model.pkl
|
|
hash: md5
|
|
md5: 67e469e0d6578012431be0cd8db6325c
|
|
size: 1855076
|