This repository has been archived by the owner on Oct 31, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathseparate-images.py
executable file
·98 lines (79 loc) · 3.21 KB
/
separate-images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
import os, sys
from random import shuffle, randint
# directory names
TESTDIR = "testing"
VALIDIR = "validation"
TRAINDIR = "training"
# get the second-to-last occurrence of pattern in text
def get_image_filename(image):
text = os.path.splitext(image)[0]
pattern = os.sep
# see https://stackoverflow.com/a/14063233/3646065
idx = text.rfind(pattern, 0, text.rfind(pattern))
return os.path.splitext(image)[0][idx + 1:] + ".jpg"
def make_dirs(rootdir):
dirs = [TESTDIR, VALIDIR, TRAINDIR]
for item in dirs:
dir = os.path.join(rootdir, item)
if not os.path.exists(dir):
os.makedirs(dir)
print("Created directory %s" % dir)
def make_out_dirs(rootdir, subdir):
dirs = [TESTDIR, VALIDIR, TRAINDIR]
for outdir in dirs:
dir = os.path.join(rootdir, outdir, subdir)
if not os.path.exists(dir):
os.makedirs(dir)
print("Created directory %s" % dir)
def make_paths(rootdir, subdir, outdir, image):
src = os.path.join(rootdir, subdir, image)
dest = os.path.join(rootdir, outdir, subdir, image)
return (src, dest)
if __name__ == "__main__":
# usage: ./separate-images.py flowers-scaled/
rootdir = sys.argv[1]
# get image subdirectories
subdirs = os.listdir(rootdir)
# make output directories
make_dirs(rootdir)
# process images in each subdirectory
for subdir in subdirs:
# make output subdirectories
imagedir = os.path.join(rootdir, subdir)
# print(imagedir)
make_out_dirs(rootdir, subdir)
# randomize image list, then separate into training, validation, and
# testing groups
images = os.listdir(imagedir)
shuffle(images)
num_images = len(images)
# print("Processing %s" % imagedir)
for i in range(0, num_images):
print("Processing item %d of %d -> " % (i + 1, num_images), end='')
# print(os.path.join(imagedir, get_image_filename(images[i])))
# skip directories inside subdirectories
if os.path.isdir(os.path.join(imagedir, get_image_filename(images[i]))):
print("skipping directory")
continue
# add ~20% of images to 'testing'
if randint(1, 5) == 1:
src, dest = make_paths(rootdir, subdir, TESTDIR, get_image_filename(images[i]))
# print("%s -> %s" % (src, dest))
print(TESTDIR)
os.rename(src, dest)
# add ~16% of images to 'validation'
elif randint(1, 25) <= 4:
src, dest = make_paths(rootdir, subdir, VALIDIR, get_image_filename(images[i]))
# print("%s -> %s" % (src, dest))
print(VALIDIR)
os.rename(src, dest)
# add the remaining ~64% of images to 'training'
else:
src, dest = make_paths(rootdir, subdir, TRAINDIR, get_image_filename(images[i]))
# print("%s -> %s" % (src, dest))
print(TRAINDIR)
os.rename(src, dest)
print("\n") # line break to make output easier to read
# remove subdir after processing all images
os.rmdir(imagedir)