pipeline defined

This commit is contained in:
2026-03-14 21:44:33 +08:00
parent b77c37cfa5
commit 620d1dfb2d
4 changed files with 58 additions and 0 deletions

1
.gitignore vendored
View File

@@ -1 +1,2 @@
.venv
/model.pkl

2
data/.gitignore vendored
View File

@@ -1 +1,3 @@
/data.xml
/prepared
/features

23
dvc.lock Normal file
View File

@@ -0,0 +1,23 @@
schema: '2.0'
stages:
prepare:
cmd: python src/prepare.py data/data.xml
deps:
- path: data/data.xml
hash: md5
md5: 22a1a2931c8370d3aeedd7183606fd7f
size: 14445097
- path: src/prepare.py
hash: md5
md5: f54d670ac8a4f63206781fc31d1f2651
size: 2231
params:
params.yaml:
prepare.seed: 20170428
prepare.split: 0.2
outs:
- path: data/prepared
hash: md5
md5: 153aad06d376b6595932470e459ef42a.dir
size: 8437363
nfiles: 2

32
dvc.yaml Normal file
View File

@@ -0,0 +1,32 @@
stages:
prepare:
cmd: python src/prepare.py data/data.xml
deps:
- data/data.xml
- src/prepare.py
params:
- prepare.seed
- prepare.split
outs:
- data/prepared
featurize:
cmd: python src/featurization.py data/prepared data/features
deps:
- data/prepared
- src/featurization.py
params:
- featurize.max_features
- featurize.ngrams
outs:
- data/features
train:
cmd: python src/train.py data/features model.pkl
deps:
- data/features
- src/train.py
params:
- train.min_split
- train.n_est
- train.seed
outs:
- model.pkl