first pipeline repro

This commit is contained in:
2026-03-14 21:46:31 +08:00
parent 620d1dfb2d
commit 0f4e426aa2

View File

@@ -21,3 +21,47 @@ stages:
md5: 153aad06d376b6595932470e459ef42a.dir md5: 153aad06d376b6595932470e459ef42a.dir
size: 8437363 size: 8437363
nfiles: 2 nfiles: 2
featurize:
cmd: python src/featurization.py data/prepared data/features
deps:
- path: data/prepared
hash: md5
md5: 153aad06d376b6595932470e459ef42a.dir
size: 8437363
nfiles: 2
- path: src/featurization.py
hash: md5
md5: 29660042a8c24019fa7392f2e1a735b9
size: 4175
params:
params.yaml:
featurize.max_features: 100
featurize.ngrams: 1
outs:
- path: data/features
hash: md5
md5: 74642e90419272839886d8e51f730b44.dir
size: 1556292
nfiles: 2
train:
cmd: python src/train.py data/features model.pkl
deps:
- path: data/features
hash: md5
md5: 74642e90419272839886d8e51f730b44.dir
size: 1556292
nfiles: 2
- path: src/train.py
hash: md5
md5: 324001573ed724e5ae092226fcf9ca30
size: 1666
params:
params.yaml:
train.min_split: 0.01
train.n_est: 50
train.seed: 20170428
outs:
- path: model.pkl
hash: md5
md5: 67e469e0d6578012431be0cd8db6325c
size: 1855076