This commit is contained in:
Arnav Zutshi 2023-12-22 14:41:13 +00:00 committed by GitHub
commit c948f5f4ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 369 additions and 0 deletions

View File

@ -0,0 +1,182 @@
[net]
# Testing
batch=1
subdivisions=1
# Training
# batch=64
# subdivisions=2
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=1
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
###########
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 3,4,5
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=80
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear
[yolo]
mask = 0,1,2
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=80
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1

View File

@ -0,0 +1,80 @@
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush

View File

@ -0,0 +1,55 @@
import cv2
import numpy as np
def run(frame, net, classes):
height, width, _ = frame.shape #height and width of the frame captured
blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), (0, 0, 0), swapRB = True, crop = False)
net.setInput(blob)
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)
boxes = [] #stores the coordinates and measurements for the bounding box
confidences = [] #Stores the confidence, i.e how much the object atches with a given class
class_ids = [] #stores all the labels
for output in layerOutputs: #get ouput layers information
for detection in output: #extract information from each output (detection contains 85 parameters)
scores = detection[5:] #prediction from all the classes, 6th element onwards
class_id = np.argmax(scores) #extract location of the class with maximum confidence(index)
confidence = scores[class_id] #extract the vaue of the confidence
if confidence > 0.5:
#these are normalised co-ordinates that is why we multiply them with heigth and width to
#scale them back
center_x = int(detection[0]*width) #the center x co-ordinate of the bounding box
center_y = int(detection[1]*height) #the center y co-ordinate of the bounding box
w = int(detection[2]*width) #width of the bounding box
h = int(detection[3]*height) #height of the bounding box
x = int(center_x - w/2) #corner x co-ordinate
y = int(center_y - h/2) #corner y co-ordinate
boxes.append([x, y, w, h]) #saves the co-ordinates and measurement in boxes[]
confidences.append((float(confidence))) #saves the confidences of the classes
class_ids.append(class_id) #index of the classes detected
#performs non-Max Supression on the classes with confidence greater then the threshold
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.2)
return indexes, boxes, class_ids, confidences
def boxing(frame, indexes, boxes, class_ids, confidences, classes, font):
for i in indexes.flatten():
x, y, w, h = boxes[i] #co-ordinates if bounding boxes of final object after NMS
label = str(classes[class_ids[i]]) #the name of the object detected
confidence = str(round(confidences[i], 2)) #saves the confidence rounding it to 2 decimals
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2) #bounda a rectangle around the object
#shows the confidence and object name at top left
cv2.putText(frame, label + " " + confidence, (x, y+20), font, 2, (255, 255, 255), 2)
return frame

View File

@ -0,0 +1,52 @@
import cv2
import os
from Bounding_boxes import run
from Bounding_boxes import boxing
def ObjectDetection(video_path):
path = os.path.abspath(video_path)
if(video_path==0):
cap = cv2.VideoCapture(0)
else:
cap = cv2.VideoCapture(path)
ret = True #creates a boolean
ret, old_frame = cap.read() #ret is true and the first frame of video saved in old_frame
net = cv2.dnn.readNet('modules/object_detection/object_detection_weights.weights', 'modules/object_detection/object_detection.cfg')
classes = []
with open('modules/object_detection/object_detection_labels.txt', 'r') as f:
classes = f.read().splitlines()
if not cap.isOpened():
raise IOError("Cannot open webcam/Cannot read file")
while ret:
ret, frame = cap.read() #saves the first frame of video in frame
indexes = []
boxes = []
class_ids = []
confidences = []
indexes, boxes, class_ids, confidences = run(frame, net, classes)
font = cv2.FONT_HERSHEY_PLAIN
if len(indexes) <= 0: #if no bounding box
continue
elif len(indexes) > 0: #if bounding box is presrnt
frame = boxing(frame, indexes, boxes, class_ids, confidences, classes, font)
cv2.imshow('Output', frame)
c = cv2.waitKey(1) #new frame comes after () ms
if cv2.waitKey(1) & 0xFF == ord('q'): #press q on keyboard to stop the webcam
break
cap.release()
cv2.destroyAllWindows() #Once out of the while loop, the pop-up window closes automatically