Merge fee33b0d42
into 8609e5fae5
This commit is contained in:
commit
c948f5f4ea
182
mediapipe/modules/object_detection/object_detection.cfg
Normal file
182
mediapipe/modules/object_detection/object_detection.cfg
Normal file
|
@ -0,0 +1,182 @@
|
||||||
|
[net]
|
||||||
|
# Testing
|
||||||
|
batch=1
|
||||||
|
subdivisions=1
|
||||||
|
# Training
|
||||||
|
# batch=64
|
||||||
|
# subdivisions=2
|
||||||
|
width=416
|
||||||
|
height=416
|
||||||
|
channels=3
|
||||||
|
momentum=0.9
|
||||||
|
decay=0.0005
|
||||||
|
angle=0
|
||||||
|
saturation = 1.5
|
||||||
|
exposure = 1.5
|
||||||
|
hue=.1
|
||||||
|
|
||||||
|
learning_rate=0.001
|
||||||
|
burn_in=1000
|
||||||
|
max_batches = 500200
|
||||||
|
policy=steps
|
||||||
|
steps=400000,450000
|
||||||
|
scales=.1,.1
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=16
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[maxpool]
|
||||||
|
size=2
|
||||||
|
stride=2
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=32
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[maxpool]
|
||||||
|
size=2
|
||||||
|
stride=2
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=64
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[maxpool]
|
||||||
|
size=2
|
||||||
|
stride=2
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[maxpool]
|
||||||
|
size=2
|
||||||
|
stride=2
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[maxpool]
|
||||||
|
size=2
|
||||||
|
stride=2
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[maxpool]
|
||||||
|
size=2
|
||||||
|
stride=1
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=1024
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
###########
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=255
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[yolo]
|
||||||
|
mask = 3,4,5
|
||||||
|
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
|
||||||
|
classes=80
|
||||||
|
num=6
|
||||||
|
jitter=.3
|
||||||
|
ignore_thresh = .7
|
||||||
|
truth_thresh = 1
|
||||||
|
random=1
|
||||||
|
|
||||||
|
[route]
|
||||||
|
layers = -4
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[upsample]
|
||||||
|
stride=2
|
||||||
|
|
||||||
|
[route]
|
||||||
|
layers = -1, 8
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=255
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[yolo]
|
||||||
|
mask = 0,1,2
|
||||||
|
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
|
||||||
|
classes=80
|
||||||
|
num=6
|
||||||
|
jitter=.3
|
||||||
|
ignore_thresh = .7
|
||||||
|
truth_thresh = 1
|
||||||
|
random=1
|
|
@ -0,0 +1,80 @@
|
||||||
|
person
|
||||||
|
bicycle
|
||||||
|
car
|
||||||
|
motorbike
|
||||||
|
aeroplane
|
||||||
|
bus
|
||||||
|
train
|
||||||
|
truck
|
||||||
|
boat
|
||||||
|
traffic light
|
||||||
|
fire hydrant
|
||||||
|
stop sign
|
||||||
|
parking meter
|
||||||
|
bench
|
||||||
|
bird
|
||||||
|
cat
|
||||||
|
dog
|
||||||
|
horse
|
||||||
|
sheep
|
||||||
|
cow
|
||||||
|
elephant
|
||||||
|
bear
|
||||||
|
zebra
|
||||||
|
giraffe
|
||||||
|
backpack
|
||||||
|
umbrella
|
||||||
|
handbag
|
||||||
|
tie
|
||||||
|
suitcase
|
||||||
|
frisbee
|
||||||
|
skis
|
||||||
|
snowboard
|
||||||
|
sports ball
|
||||||
|
kite
|
||||||
|
baseball bat
|
||||||
|
baseball glove
|
||||||
|
skateboard
|
||||||
|
surfboard
|
||||||
|
tennis racket
|
||||||
|
bottle
|
||||||
|
wine glass
|
||||||
|
cup
|
||||||
|
fork
|
||||||
|
knife
|
||||||
|
spoon
|
||||||
|
bowl
|
||||||
|
banana
|
||||||
|
apple
|
||||||
|
sandwich
|
||||||
|
orange
|
||||||
|
broccoli
|
||||||
|
carrot
|
||||||
|
hot dog
|
||||||
|
pizza
|
||||||
|
donut
|
||||||
|
cake
|
||||||
|
chair
|
||||||
|
sofa
|
||||||
|
pottedplant
|
||||||
|
bed
|
||||||
|
diningtable
|
||||||
|
toilet
|
||||||
|
tvmonitor
|
||||||
|
laptop
|
||||||
|
mouse
|
||||||
|
remote
|
||||||
|
keyboard
|
||||||
|
cell phone
|
||||||
|
microwave
|
||||||
|
oven
|
||||||
|
toaster
|
||||||
|
sink
|
||||||
|
refrigerator
|
||||||
|
book
|
||||||
|
clock
|
||||||
|
vase
|
||||||
|
scissors
|
||||||
|
teddy bear
|
||||||
|
hair drier
|
||||||
|
toothbrush
|
Binary file not shown.
55
mediapipe/python/solutions/Bounding_boxes.py
Normal file
55
mediapipe/python/solutions/Bounding_boxes.py
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def run(frame, net, classes):
|
||||||
|
|
||||||
|
height, width, _ = frame.shape #height and width of the frame captured
|
||||||
|
|
||||||
|
blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), (0, 0, 0), swapRB = True, crop = False)
|
||||||
|
net.setInput(blob)
|
||||||
|
|
||||||
|
output_layers_names = net.getUnconnectedOutLayersNames()
|
||||||
|
layerOutputs = net.forward(output_layers_names)
|
||||||
|
|
||||||
|
boxes = [] #stores the coordinates and measurements for the bounding box
|
||||||
|
confidences = [] #Stores the confidence, i.e how much the object atches with a given class
|
||||||
|
class_ids = [] #stores all the labels
|
||||||
|
|
||||||
|
for output in layerOutputs: #get ouput layers information
|
||||||
|
for detection in output: #extract information from each output (detection contains 85 parameters)
|
||||||
|
|
||||||
|
scores = detection[5:] #prediction from all the classes, 6th element onwards
|
||||||
|
|
||||||
|
class_id = np.argmax(scores) #extract location of the class with maximum confidence(index)
|
||||||
|
confidence = scores[class_id] #extract the vaue of the confidence
|
||||||
|
if confidence > 0.5:
|
||||||
|
#these are normalised co-ordinates that is why we multiply them with heigth and width to
|
||||||
|
#scale them back
|
||||||
|
center_x = int(detection[0]*width) #the center x co-ordinate of the bounding box
|
||||||
|
center_y = int(detection[1]*height) #the center y co-ordinate of the bounding box
|
||||||
|
w = int(detection[2]*width) #width of the bounding box
|
||||||
|
h = int(detection[3]*height) #height of the bounding box
|
||||||
|
|
||||||
|
x = int(center_x - w/2) #corner x co-ordinate
|
||||||
|
y = int(center_y - h/2) #corner y co-ordinate
|
||||||
|
|
||||||
|
boxes.append([x, y, w, h]) #saves the co-ordinates and measurement in boxes[]
|
||||||
|
confidences.append((float(confidence))) #saves the confidences of the classes
|
||||||
|
class_ids.append(class_id) #index of the classes detected
|
||||||
|
|
||||||
|
#performs non-Max Supression on the classes with confidence greater then the threshold
|
||||||
|
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.2)
|
||||||
|
|
||||||
|
|
||||||
|
return indexes, boxes, class_ids, confidences
|
||||||
|
|
||||||
|
def boxing(frame, indexes, boxes, class_ids, confidences, classes, font):
|
||||||
|
for i in indexes.flatten():
|
||||||
|
x, y, w, h = boxes[i] #co-ordinates if bounding boxes of final object after NMS
|
||||||
|
label = str(classes[class_ids[i]]) #the name of the object detected
|
||||||
|
confidence = str(round(confidences[i], 2)) #saves the confidence rounding it to 2 decimals
|
||||||
|
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2) #bounda a rectangle around the object
|
||||||
|
#shows the confidence and object name at top left
|
||||||
|
cv2.putText(frame, label + " " + confidence, (x, y+20), font, 2, (255, 255, 255), 2)
|
||||||
|
|
||||||
|
return frame
|
52
mediapipe/python/solutions/ObjectDetection.py
Normal file
52
mediapipe/python/solutions/ObjectDetection.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
import cv2
|
||||||
|
import os
|
||||||
|
from Bounding_boxes import run
|
||||||
|
from Bounding_boxes import boxing
|
||||||
|
|
||||||
|
|
||||||
|
def ObjectDetection(video_path):
|
||||||
|
|
||||||
|
path = os.path.abspath(video_path)
|
||||||
|
|
||||||
|
if(video_path==0):
|
||||||
|
cap = cv2.VideoCapture(0)
|
||||||
|
|
||||||
|
else:
|
||||||
|
cap = cv2.VideoCapture(path)
|
||||||
|
|
||||||
|
ret = True #creates a boolean
|
||||||
|
ret, old_frame = cap.read() #ret is true and the first frame of video saved in old_frame
|
||||||
|
|
||||||
|
|
||||||
|
net = cv2.dnn.readNet('modules/object_detection/object_detection_weights.weights', 'modules/object_detection/object_detection.cfg')
|
||||||
|
|
||||||
|
classes = []
|
||||||
|
|
||||||
|
with open('modules/object_detection/object_detection_labels.txt', 'r') as f:
|
||||||
|
classes = f.read().splitlines()
|
||||||
|
|
||||||
|
if not cap.isOpened():
|
||||||
|
raise IOError("Cannot open webcam/Cannot read file")
|
||||||
|
|
||||||
|
while ret:
|
||||||
|
ret, frame = cap.read() #saves the first frame of video in frame
|
||||||
|
|
||||||
|
indexes = []
|
||||||
|
boxes = []
|
||||||
|
class_ids = []
|
||||||
|
confidences = []
|
||||||
|
indexes, boxes, class_ids, confidences = run(frame, net, classes)
|
||||||
|
font = cv2.FONT_HERSHEY_PLAIN
|
||||||
|
|
||||||
|
if len(indexes) <= 0: #if no bounding box
|
||||||
|
continue
|
||||||
|
elif len(indexes) > 0: #if bounding box is presrnt
|
||||||
|
|
||||||
|
frame = boxing(frame, indexes, boxes, class_ids, confidences, classes, font)
|
||||||
|
cv2.imshow('Output', frame)
|
||||||
|
c = cv2.waitKey(1) #new frame comes after () ms
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'): #press q on keyboard to stop the webcam
|
||||||
|
break
|
||||||
|
|
||||||
|
cap.release()
|
||||||
|
cv2.destroyAllWindows() #Once out of the while loop, the pop-up window closes automatically
|
Loading…
Reference in New Issue
Block a user