Merge fee33b0d42
into 8609e5fae5
This commit is contained in:
commit
c948f5f4ea
182
mediapipe/modules/object_detection/object_detection.cfg
Normal file
182
mediapipe/modules/object_detection/object_detection.cfg
Normal file
|
@ -0,0 +1,182 @@
|
|||
[net]
|
||||
# Testing
|
||||
batch=1
|
||||
subdivisions=1
|
||||
# Training
|
||||
# batch=64
|
||||
# subdivisions=2
|
||||
width=416
|
||||
height=416
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.001
|
||||
burn_in=1000
|
||||
max_batches = 500200
|
||||
policy=steps
|
||||
steps=400000,450000
|
||||
scales=.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=16
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
###########
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=255
|
||||
activation=linear
|
||||
|
||||
|
||||
|
||||
[yolo]
|
||||
mask = 3,4,5
|
||||
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
|
||||
classes=80
|
||||
num=6
|
||||
jitter=.3
|
||||
ignore_thresh = .7
|
||||
truth_thresh = 1
|
||||
random=1
|
||||
|
||||
[route]
|
||||
layers = -4
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[upsample]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers = -1, 8
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=255
|
||||
activation=linear
|
||||
|
||||
[yolo]
|
||||
mask = 0,1,2
|
||||
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
|
||||
classes=80
|
||||
num=6
|
||||
jitter=.3
|
||||
ignore_thresh = .7
|
||||
truth_thresh = 1
|
||||
random=1
|
|
@ -0,0 +1,80 @@
|
|||
person
|
||||
bicycle
|
||||
car
|
||||
motorbike
|
||||
aeroplane
|
||||
bus
|
||||
train
|
||||
truck
|
||||
boat
|
||||
traffic light
|
||||
fire hydrant
|
||||
stop sign
|
||||
parking meter
|
||||
bench
|
||||
bird
|
||||
cat
|
||||
dog
|
||||
horse
|
||||
sheep
|
||||
cow
|
||||
elephant
|
||||
bear
|
||||
zebra
|
||||
giraffe
|
||||
backpack
|
||||
umbrella
|
||||
handbag
|
||||
tie
|
||||
suitcase
|
||||
frisbee
|
||||
skis
|
||||
snowboard
|
||||
sports ball
|
||||
kite
|
||||
baseball bat
|
||||
baseball glove
|
||||
skateboard
|
||||
surfboard
|
||||
tennis racket
|
||||
bottle
|
||||
wine glass
|
||||
cup
|
||||
fork
|
||||
knife
|
||||
spoon
|
||||
bowl
|
||||
banana
|
||||
apple
|
||||
sandwich
|
||||
orange
|
||||
broccoli
|
||||
carrot
|
||||
hot dog
|
||||
pizza
|
||||
donut
|
||||
cake
|
||||
chair
|
||||
sofa
|
||||
pottedplant
|
||||
bed
|
||||
diningtable
|
||||
toilet
|
||||
tvmonitor
|
||||
laptop
|
||||
mouse
|
||||
remote
|
||||
keyboard
|
||||
cell phone
|
||||
microwave
|
||||
oven
|
||||
toaster
|
||||
sink
|
||||
refrigerator
|
||||
book
|
||||
clock
|
||||
vase
|
||||
scissors
|
||||
teddy bear
|
||||
hair drier
|
||||
toothbrush
|
Binary file not shown.
55
mediapipe/python/solutions/Bounding_boxes.py
Normal file
55
mediapipe/python/solutions/Bounding_boxes.py
Normal file
|
@ -0,0 +1,55 @@
|
|||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def run(frame, net, classes):
|
||||
|
||||
height, width, _ = frame.shape #height and width of the frame captured
|
||||
|
||||
blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), (0, 0, 0), swapRB = True, crop = False)
|
||||
net.setInput(blob)
|
||||
|
||||
output_layers_names = net.getUnconnectedOutLayersNames()
|
||||
layerOutputs = net.forward(output_layers_names)
|
||||
|
||||
boxes = [] #stores the coordinates and measurements for the bounding box
|
||||
confidences = [] #Stores the confidence, i.e how much the object atches with a given class
|
||||
class_ids = [] #stores all the labels
|
||||
|
||||
for output in layerOutputs: #get ouput layers information
|
||||
for detection in output: #extract information from each output (detection contains 85 parameters)
|
||||
|
||||
scores = detection[5:] #prediction from all the classes, 6th element onwards
|
||||
|
||||
class_id = np.argmax(scores) #extract location of the class with maximum confidence(index)
|
||||
confidence = scores[class_id] #extract the vaue of the confidence
|
||||
if confidence > 0.5:
|
||||
#these are normalised co-ordinates that is why we multiply them with heigth and width to
|
||||
#scale them back
|
||||
center_x = int(detection[0]*width) #the center x co-ordinate of the bounding box
|
||||
center_y = int(detection[1]*height) #the center y co-ordinate of the bounding box
|
||||
w = int(detection[2]*width) #width of the bounding box
|
||||
h = int(detection[3]*height) #height of the bounding box
|
||||
|
||||
x = int(center_x - w/2) #corner x co-ordinate
|
||||
y = int(center_y - h/2) #corner y co-ordinate
|
||||
|
||||
boxes.append([x, y, w, h]) #saves the co-ordinates and measurement in boxes[]
|
||||
confidences.append((float(confidence))) #saves the confidences of the classes
|
||||
class_ids.append(class_id) #index of the classes detected
|
||||
|
||||
#performs non-Max Supression on the classes with confidence greater then the threshold
|
||||
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.2)
|
||||
|
||||
|
||||
return indexes, boxes, class_ids, confidences
|
||||
|
||||
def boxing(frame, indexes, boxes, class_ids, confidences, classes, font):
|
||||
for i in indexes.flatten():
|
||||
x, y, w, h = boxes[i] #co-ordinates if bounding boxes of final object after NMS
|
||||
label = str(classes[class_ids[i]]) #the name of the object detected
|
||||
confidence = str(round(confidences[i], 2)) #saves the confidence rounding it to 2 decimals
|
||||
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2) #bounda a rectangle around the object
|
||||
#shows the confidence and object name at top left
|
||||
cv2.putText(frame, label + " " + confidence, (x, y+20), font, 2, (255, 255, 255), 2)
|
||||
|
||||
return frame
|
52
mediapipe/python/solutions/ObjectDetection.py
Normal file
52
mediapipe/python/solutions/ObjectDetection.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
import cv2
|
||||
import os
|
||||
from Bounding_boxes import run
|
||||
from Bounding_boxes import boxing
|
||||
|
||||
|
||||
def ObjectDetection(video_path):
|
||||
|
||||
path = os.path.abspath(video_path)
|
||||
|
||||
if(video_path==0):
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
else:
|
||||
cap = cv2.VideoCapture(path)
|
||||
|
||||
ret = True #creates a boolean
|
||||
ret, old_frame = cap.read() #ret is true and the first frame of video saved in old_frame
|
||||
|
||||
|
||||
net = cv2.dnn.readNet('modules/object_detection/object_detection_weights.weights', 'modules/object_detection/object_detection.cfg')
|
||||
|
||||
classes = []
|
||||
|
||||
with open('modules/object_detection/object_detection_labels.txt', 'r') as f:
|
||||
classes = f.read().splitlines()
|
||||
|
||||
if not cap.isOpened():
|
||||
raise IOError("Cannot open webcam/Cannot read file")
|
||||
|
||||
while ret:
|
||||
ret, frame = cap.read() #saves the first frame of video in frame
|
||||
|
||||
indexes = []
|
||||
boxes = []
|
||||
class_ids = []
|
||||
confidences = []
|
||||
indexes, boxes, class_ids, confidences = run(frame, net, classes)
|
||||
font = cv2.FONT_HERSHEY_PLAIN
|
||||
|
||||
if len(indexes) <= 0: #if no bounding box
|
||||
continue
|
||||
elif len(indexes) > 0: #if bounding box is presrnt
|
||||
|
||||
frame = boxing(frame, indexes, boxes, class_ids, confidences, classes, font)
|
||||
cv2.imshow('Output', frame)
|
||||
c = cv2.waitKey(1) #new frame comes after () ms
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'): #press q on keyboard to stop the webcam
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows() #Once out of the while loop, the pop-up window closes automatically
|
Loading…
Reference in New Issue
Block a user