Files
data-pipeline/dvc.lock
2026-03-14 21:46:31 +08:00

68 lines
1.6 KiB
Plaintext

schema: '2.0'
stages:
prepare:
cmd: python src/prepare.py data/data.xml
deps:
- path: data/data.xml
hash: md5
md5: 22a1a2931c8370d3aeedd7183606fd7f
size: 14445097
- path: src/prepare.py
hash: md5
md5: f54d670ac8a4f63206781fc31d1f2651
size: 2231
params:
params.yaml:
prepare.seed: 20170428
prepare.split: 0.2
outs:
- path: data/prepared
hash: md5
md5: 153aad06d376b6595932470e459ef42a.dir
size: 8437363
nfiles: 2
featurize:
cmd: python src/featurization.py data/prepared data/features
deps:
- path: data/prepared
hash: md5
md5: 153aad06d376b6595932470e459ef42a.dir
size: 8437363
nfiles: 2
- path: src/featurization.py
hash: md5
md5: 29660042a8c24019fa7392f2e1a735b9
size: 4175
params:
params.yaml:
featurize.max_features: 100
featurize.ngrams: 1
outs:
- path: data/features
hash: md5
md5: 74642e90419272839886d8e51f730b44.dir
size: 1556292
nfiles: 2
train:
cmd: python src/train.py data/features model.pkl
deps:
- path: data/features
hash: md5
md5: 74642e90419272839886d8e51f730b44.dir
size: 1556292
nfiles: 2
- path: src/train.py
hash: md5
md5: 324001573ed724e5ae092226fcf9ca30
size: 1666
params:
params.yaml:
train.min_split: 0.01
train.n_est: 50
train.seed: 20170428
outs:
- path: model.pkl
hash: md5
md5: 67e469e0d6578012431be0cd8db6325c
size: 1855076