Merge fee33b0d42 into 8609e5fae5

2023-12-22 14:41:13 +00:00 · 2023-12-22 14:41:13 +00:00 · c948f5f4ea
commit c948f5f4ea
parent 8609e5fae5 fee33b0d42
5 changed files with 369 additions and 0 deletions
--- a/mediapipe/modules/object_detection/object_detection.cfg
+++ b/mediapipe/modules/object_detection/object_detection.cfg
@ -0,0 +1,182 @@
 [net]
 # Testing
 batch=1
 subdivisions=1
 # Training
 # batch=64
 # subdivisions=2
 width=416
 height=416
 channels=3
 momentum=0.9
 decay=0.0005
 angle=0
 saturation = 1.5
 exposure = 1.5
 hue=.1
 learning_rate=0.001
 burn_in=1000
 max_batches = 500200
 policy=steps
 steps=400000,450000
 scales=.1,.1
 [convolutional]
 batch_normalize=1
 filters=16
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=32
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=1
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=1
 pad=1
 activation=leaky
 ###########
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 size=1
 stride=1
 pad=1
 filters=255
 activation=linear
 [yolo]
 mask = 3,4,5
 anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
 classes=80
 num=6
 jitter=.3
 ignore_thresh = .7
 truth_thresh = 1
 random=1
 [route]
 layers = -4
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [upsample]
 stride=2
 [route]
 layers = -1, 8
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 size=1
 stride=1
 pad=1
 filters=255
 activation=linear
 [yolo]
 mask = 0,1,2
 anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
 classes=80
 num=6
 jitter=.3
 ignore_thresh = .7
 truth_thresh = 1
 random=1
--- a/mediapipe/modules/object_detection/object_detection_labels.txt
+++ b/mediapipe/modules/object_detection/object_detection_labels.txt
@ -0,0 +1,80 @@
 person
 bicycle
 car
 motorbike
 aeroplane
 bus
 train
 truck
 boat
 traffic light
 fire hydrant
 stop sign
 parking meter
 bench
 bird
 cat
 dog
 horse
 sheep
 cow
 elephant
 bear
 zebra
 giraffe
 backpack
 umbrella
 handbag
 tie
 suitcase
 frisbee
 skis
 snowboard
 sports ball
 kite
 baseball bat
 baseball glove
 skateboard
 surfboard
 tennis racket
 bottle
 wine glass
 cup
 fork
 knife
 spoon
 bowl
 banana
 apple
 sandwich
 orange
 broccoli
 carrot
 hot dog
 pizza
 donut
 cake
 chair
 sofa
 pottedplant
 bed
 diningtable
 toilet
 tvmonitor
 laptop
 mouse
 remote
 keyboard
 cell phone
 microwave
 oven
 toaster
 sink
 refrigerator
 book
 clock
 vase
 scissors
 teddy bear
 hair drier
 toothbrush
--- a/mediapipe/modules/object_detection/object_detection_weights.weights
+++ b/mediapipe/modules/object_detection/object_detection_weights.weights
--- a/mediapipe/python/solutions/Bounding_boxes.py
+++ b/mediapipe/python/solutions/Bounding_boxes.py
@ -0,0 +1,55 @@
 import cv2
 import numpy as np
 def run(frame, net, classes):
    height, width, _ = frame.shape   #height and width of the frame captured
    blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), (0, 0, 0), swapRB = True, crop = False)
    net.setInput(blob)
    output_layers_names = net.getUnconnectedOutLayersNames()
    layerOutputs = net.forward(output_layers_names)
    boxes = []       #stores the coordinates and measurements for the bounding box
    confidences = [] #Stores the confidence, i.e how much the object atches with a given class
    class_ids = []   #stores all the labels
    for output in layerOutputs:   #get ouput layers information
        for detection in output:  #extract information from each output (detection contains 85 parameters)
            scores = detection[5:] #prediction from all the classes, 6th element onwards
            class_id = np.argmax(scores) #extract location of the class with maximum confidence(index)
            confidence = scores[class_id] #extract the vaue of the confidence
            if confidence > 0.5:
                #these are normalised co-ordinates that is why we multiply them with heigth and width to
                #scale them back
                center_x = int(detection[0]*width) #the center x co-ordinate of the bounding box
                center_y = int(detection[1]*height) #the center y co-ordinate of the bounding box
                w = int(detection[2]*width)         #width of the bounding box
                h = int(detection[3]*height)        #height of the bounding box
                x = int(center_x - w/2)             #corner x co-ordinate
                y = int(center_y - h/2)             #corner y co-ordinate
                boxes.append([x, y, w, h])          #saves the co-ordinates and measurement in boxes[]
                confidences.append((float(confidence))) #saves the confidences of the classes
                class_ids.append(class_id)              #index of the classes detected
    #performs non-Max Supression on the classes with confidence greater then the threshold
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.2) 
    return indexes, boxes, class_ids, confidences
 def boxing(frame, indexes, boxes, class_ids, confidences, classes, font):
    for i in indexes.flatten(): 
            x, y, w, h =  boxes[i] #co-ordinates if bounding boxes of final object after NMS
            label = str(classes[class_ids[i]]) #the name of the object detected
            confidence = str(round(confidences[i], 2)) #saves the confidence rounding it to 2 decimals
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2) #bounda a rectangle around the object
            #shows the confidence and object name at top left
            cv2.putText(frame, label + " " + confidence, (x, y+20), font, 2, (255, 255, 255), 2)
    return frame
--- a/mediapipe/python/solutions/ObjectDetection.py
+++ b/mediapipe/python/solutions/ObjectDetection.py
@ -0,0 +1,52 @@
 import cv2
 import os
 from Bounding_boxes import run
 from Bounding_boxes import boxing
 def ObjectDetection(video_path):
    path = os.path.abspath(video_path)
    if(video_path==0):
        cap = cv2.VideoCapture(0)
    else:
        cap = cv2.VideoCapture(path)
    ret = True                                       #creates a boolean 
    ret, old_frame = cap.read()                      #ret is true and the first frame of video saved in old_frame
    net = cv2.dnn.readNet('modules/object_detection/object_detection_weights.weights', 'modules/object_detection/object_detection.cfg')
    classes = []
    with open('modules/object_detection/object_detection_labels.txt', 'r') as f:
        classes = f.read().splitlines() 
    if not cap.isOpened():
        raise IOError("Cannot open webcam/Cannot read file")
    while ret:
        ret, frame = cap.read()          #saves the first frame of video in frame
        indexes = []
        boxes = []
        class_ids = []
        confidences = []
        indexes, boxes, class_ids, confidences = run(frame, net, classes)
        font = cv2.FONT_HERSHEY_PLAIN
        if len(indexes) <= 0:    #if no bounding box
            continue
        elif len(indexes) > 0:  #if bounding box is presrnt
            frame = boxing(frame, indexes, boxes, class_ids, confidences, classes, font)
        cv2.imshow('Output', frame)
        c = cv2.waitKey(1)           #new frame comes after () ms
        if cv2.waitKey(1) & 0xFF == ord('q'): #press q on keyboard to stop the webcam
            break
    cap.release()
    cv2.destroyAllWindows()          #Once out of the while loop, the pop-up window closes automatically