Compare commits

...

2 Commits

Author SHA1 Message Date
0f4e426aa2 first pipeline repro 2026-03-14 21:46:31 +08:00
620d1dfb2d pipeline defined 2026-03-14 21:44:33 +08:00
4 changed files with 102 additions and 0 deletions

1
.gitignore vendored
View File

@@ -1 +1,2 @@
.venv
/model.pkl

2
data/.gitignore vendored
View File

@@ -1 +1,3 @@
/data.xml
/prepared
/features

67
dvc.lock Normal file
View File

@@ -0,0 +1,67 @@
schema: '2.0'
stages:
prepare:
cmd: python src/prepare.py data/data.xml
deps:
- path: data/data.xml
hash: md5
md5: 22a1a2931c8370d3aeedd7183606fd7f
size: 14445097
- path: src/prepare.py
hash: md5
md5: f54d670ac8a4f63206781fc31d1f2651
size: 2231
params:
params.yaml:
prepare.seed: 20170428
prepare.split: 0.2
outs:
- path: data/prepared
hash: md5
md5: 153aad06d376b6595932470e459ef42a.dir
size: 8437363
nfiles: 2
featurize:
cmd: python src/featurization.py data/prepared data/features
deps:
- path: data/prepared
hash: md5
md5: 153aad06d376b6595932470e459ef42a.dir
size: 8437363
nfiles: 2
- path: src/featurization.py
hash: md5
md5: 29660042a8c24019fa7392f2e1a735b9
size: 4175
params:
params.yaml:
featurize.max_features: 100
featurize.ngrams: 1
outs:
- path: data/features
hash: md5
md5: 74642e90419272839886d8e51f730b44.dir
size: 1556292
nfiles: 2
train:
cmd: python src/train.py data/features model.pkl
deps:
- path: data/features
hash: md5
md5: 74642e90419272839886d8e51f730b44.dir
size: 1556292
nfiles: 2
- path: src/train.py
hash: md5
md5: 324001573ed724e5ae092226fcf9ca30
size: 1666
params:
params.yaml:
train.min_split: 0.01
train.n_est: 50
train.seed: 20170428
outs:
- path: model.pkl
hash: md5
md5: 67e469e0d6578012431be0cd8db6325c
size: 1855076

32
dvc.yaml Normal file
View File

@@ -0,0 +1,32 @@
stages:
prepare:
cmd: python src/prepare.py data/data.xml
deps:
- data/data.xml
- src/prepare.py
params:
- prepare.seed
- prepare.split
outs:
- data/prepared
featurize:
cmd: python src/featurization.py data/prepared data/features
deps:
- data/prepared
- src/featurization.py
params:
- featurize.max_features
- featurize.ngrams
outs:
- data/features
train:
cmd: python src/train.py data/features model.pkl
deps:
- data/features
- src/train.py
params:
- train.min_split
- train.n_est
- train.seed
outs:
- model.pkl