From 0f4e426aa22f520b55203c166f3c5c032ec8b4b5 Mon Sep 17 00:00:00 2001 From: Cunliang Kong Date: Sat, 14 Mar 2026 21:46:31 +0800 Subject: [PATCH] first pipeline repro --- dvc.lock | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/dvc.lock b/dvc.lock index bb09739..f3abab4 100644 --- a/dvc.lock +++ b/dvc.lock @@ -21,3 +21,47 @@ stages: md5: 153aad06d376b6595932470e459ef42a.dir size: 8437363 nfiles: 2 + featurize: + cmd: python src/featurization.py data/prepared data/features + deps: + - path: data/prepared + hash: md5 + md5: 153aad06d376b6595932470e459ef42a.dir + size: 8437363 + nfiles: 2 + - path: src/featurization.py + hash: md5 + md5: 29660042a8c24019fa7392f2e1a735b9 + size: 4175 + params: + params.yaml: + featurize.max_features: 100 + featurize.ngrams: 1 + outs: + - path: data/features + hash: md5 + md5: 74642e90419272839886d8e51f730b44.dir + size: 1556292 + nfiles: 2 + train: + cmd: python src/train.py data/features model.pkl + deps: + - path: data/features + hash: md5 + md5: 74642e90419272839886d8e51f730b44.dir + size: 1556292 + nfiles: 2 + - path: src/train.py + hash: md5 + md5: 324001573ed724e5ae092226fcf9ca30 + size: 1666 + params: + params.yaml: + train.min_split: 0.01 + train.n_est: 50 + train.seed: 20170428 + outs: + - path: model.pkl + hash: md5 + md5: 67e469e0d6578012431be0cd8db6325c + size: 1855076