pipeline defined
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1 +1,2 @@
|
||||
.venv
|
||||
/model.pkl
|
||||
|
||||
2
data/.gitignore
vendored
2
data/.gitignore
vendored
@@ -1 +1,3 @@
|
||||
/data.xml
|
||||
/prepared
|
||||
/features
|
||||
|
||||
23
dvc.lock
Normal file
23
dvc.lock
Normal file
@@ -0,0 +1,23 @@
|
||||
schema: '2.0'
|
||||
stages:
|
||||
prepare:
|
||||
cmd: python src/prepare.py data/data.xml
|
||||
deps:
|
||||
- path: data/data.xml
|
||||
hash: md5
|
||||
md5: 22a1a2931c8370d3aeedd7183606fd7f
|
||||
size: 14445097
|
||||
- path: src/prepare.py
|
||||
hash: md5
|
||||
md5: f54d670ac8a4f63206781fc31d1f2651
|
||||
size: 2231
|
||||
params:
|
||||
params.yaml:
|
||||
prepare.seed: 20170428
|
||||
prepare.split: 0.2
|
||||
outs:
|
||||
- path: data/prepared
|
||||
hash: md5
|
||||
md5: 153aad06d376b6595932470e459ef42a.dir
|
||||
size: 8437363
|
||||
nfiles: 2
|
||||
32
dvc.yaml
Normal file
32
dvc.yaml
Normal file
@@ -0,0 +1,32 @@
|
||||
stages:
|
||||
prepare:
|
||||
cmd: python src/prepare.py data/data.xml
|
||||
deps:
|
||||
- data/data.xml
|
||||
- src/prepare.py
|
||||
params:
|
||||
- prepare.seed
|
||||
- prepare.split
|
||||
outs:
|
||||
- data/prepared
|
||||
featurize:
|
||||
cmd: python src/featurization.py data/prepared data/features
|
||||
deps:
|
||||
- data/prepared
|
||||
- src/featurization.py
|
||||
params:
|
||||
- featurize.max_features
|
||||
- featurize.ngrams
|
||||
outs:
|
||||
- data/features
|
||||
train:
|
||||
cmd: python src/train.py data/features model.pkl
|
||||
deps:
|
||||
- data/features
|
||||
- src/train.py
|
||||
params:
|
||||
- train.min_split
|
||||
- train.n_est
|
||||
- train.seed
|
||||
outs:
|
||||
- model.pkl
|
||||
Reference in New Issue
Block a user