Compare commits
2 Commits
b77c37cfa5
...
0f4e426aa2
| Author | SHA1 | Date | |
|---|---|---|---|
| 0f4e426aa2 | |||
| 620d1dfb2d |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1 +1,2 @@
|
||||
.venv
|
||||
/model.pkl
|
||||
|
||||
2
data/.gitignore
vendored
2
data/.gitignore
vendored
@@ -1 +1,3 @@
|
||||
/data.xml
|
||||
/prepared
|
||||
/features
|
||||
|
||||
67
dvc.lock
Normal file
67
dvc.lock
Normal file
@@ -0,0 +1,67 @@
|
||||
schema: '2.0'
|
||||
stages:
|
||||
prepare:
|
||||
cmd: python src/prepare.py data/data.xml
|
||||
deps:
|
||||
- path: data/data.xml
|
||||
hash: md5
|
||||
md5: 22a1a2931c8370d3aeedd7183606fd7f
|
||||
size: 14445097
|
||||
- path: src/prepare.py
|
||||
hash: md5
|
||||
md5: f54d670ac8a4f63206781fc31d1f2651
|
||||
size: 2231
|
||||
params:
|
||||
params.yaml:
|
||||
prepare.seed: 20170428
|
||||
prepare.split: 0.2
|
||||
outs:
|
||||
- path: data/prepared
|
||||
hash: md5
|
||||
md5: 153aad06d376b6595932470e459ef42a.dir
|
||||
size: 8437363
|
||||
nfiles: 2
|
||||
featurize:
|
||||
cmd: python src/featurization.py data/prepared data/features
|
||||
deps:
|
||||
- path: data/prepared
|
||||
hash: md5
|
||||
md5: 153aad06d376b6595932470e459ef42a.dir
|
||||
size: 8437363
|
||||
nfiles: 2
|
||||
- path: src/featurization.py
|
||||
hash: md5
|
||||
md5: 29660042a8c24019fa7392f2e1a735b9
|
||||
size: 4175
|
||||
params:
|
||||
params.yaml:
|
||||
featurize.max_features: 100
|
||||
featurize.ngrams: 1
|
||||
outs:
|
||||
- path: data/features
|
||||
hash: md5
|
||||
md5: 74642e90419272839886d8e51f730b44.dir
|
||||
size: 1556292
|
||||
nfiles: 2
|
||||
train:
|
||||
cmd: python src/train.py data/features model.pkl
|
||||
deps:
|
||||
- path: data/features
|
||||
hash: md5
|
||||
md5: 74642e90419272839886d8e51f730b44.dir
|
||||
size: 1556292
|
||||
nfiles: 2
|
||||
- path: src/train.py
|
||||
hash: md5
|
||||
md5: 324001573ed724e5ae092226fcf9ca30
|
||||
size: 1666
|
||||
params:
|
||||
params.yaml:
|
||||
train.min_split: 0.01
|
||||
train.n_est: 50
|
||||
train.seed: 20170428
|
||||
outs:
|
||||
- path: model.pkl
|
||||
hash: md5
|
||||
md5: 67e469e0d6578012431be0cd8db6325c
|
||||
size: 1855076
|
||||
32
dvc.yaml
Normal file
32
dvc.yaml
Normal file
@@ -0,0 +1,32 @@
|
||||
stages:
|
||||
prepare:
|
||||
cmd: python src/prepare.py data/data.xml
|
||||
deps:
|
||||
- data/data.xml
|
||||
- src/prepare.py
|
||||
params:
|
||||
- prepare.seed
|
||||
- prepare.split
|
||||
outs:
|
||||
- data/prepared
|
||||
featurize:
|
||||
cmd: python src/featurization.py data/prepared data/features
|
||||
deps:
|
||||
- data/prepared
|
||||
- src/featurization.py
|
||||
params:
|
||||
- featurize.max_features
|
||||
- featurize.ngrams
|
||||
outs:
|
||||
- data/features
|
||||
train:
|
||||
cmd: python src/train.py data/features model.pkl
|
||||
deps:
|
||||
- data/features
|
||||
- src/train.py
|
||||
params:
|
||||
- train.min_split
|
||||
- train.n_est
|
||||
- train.seed
|
||||
outs:
|
||||
- model.pkl
|
||||
Reference in New Issue
Block a user