Merge fee33b0d42 into 8609e5fae5

2023-12-22 14:41:13 +00:00 · 2023-12-22 14:41:13 +00:00 · c948f5f4ea
commit c948f5f4ea
parent 8609e5fae5 fee33b0d42
5 changed files with 369 additions and 0 deletions
--- a/mediapipe/modules/object_detection/object_detection.cfg
+++ b/mediapipe/modules/object_detection/object_detection.cfg
@ -0,0 +1,182 @@
+[net]
+# Testing
+batch=1
+subdivisions=1
+# Training
+# batch=64
+# subdivisions=2
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=1
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+
+
+[yolo]
+mask = 3,4,5
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=80
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 8
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+
+[yolo]
+mask = 0,1,2
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=80
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
--- a/mediapipe/modules/object_detection/object_detection_labels.txt
+++ b/mediapipe/modules/object_detection/object_detection_labels.txt
@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/mediapipe/modules/object_detection/object_detection_weights.weights
+++ b/mediapipe/modules/object_detection/object_detection_weights.weights
--- a/mediapipe/python/solutions/Bounding_boxes.py
+++ b/mediapipe/python/solutions/Bounding_boxes.py
@ -0,0 +1,55 @@
+import cv2
+import numpy as np
+
+def run(frame, net, classes):
+        
+    height, width, _ = frame.shape   #height and width of the frame captured
+        
+    blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), (0, 0, 0), swapRB = True, crop = False)
+    net.setInput(blob)
+    
+    output_layers_names = net.getUnconnectedOutLayersNames()
+    layerOutputs = net.forward(output_layers_names)
+    
+    boxes = []       #stores the coordinates and measurements for the bounding box
+    confidences = [] #Stores the confidence, i.e how much the object atches with a given class
+    class_ids = []   #stores all the labels
+
+    for output in layerOutputs:   #get ouput layers information
+        for detection in output:  #extract information from each output (detection contains 85 parameters)
+            
+            scores = detection[5:] #prediction from all the classes, 6th element onwards
+            
+            class_id = np.argmax(scores) #extract location of the class with maximum confidence(index)
+            confidence = scores[class_id] #extract the vaue of the confidence
+            if confidence > 0.5:
+                #these are normalised co-ordinates that is why we multiply them with heigth and width to
+                #scale them back
+                center_x = int(detection[0]*width) #the center x co-ordinate of the bounding box
+                center_y = int(detection[1]*height) #the center y co-ordinate of the bounding box
+                w = int(detection[2]*width)         #width of the bounding box
+                h = int(detection[3]*height)        #height of the bounding box
+
+                x = int(center_x - w/2)             #corner x co-ordinate
+                y = int(center_y - h/2)             #corner y co-ordinate
+
+                boxes.append([x, y, w, h])          #saves the co-ordinates and measurement in boxes[]
+                confidences.append((float(confidence))) #saves the confidences of the classes
+                class_ids.append(class_id)              #index of the classes detected
+    
+    #performs non-Max Supression on the classes with confidence greater then the threshold
+    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.2) 
+
+
+    return indexes, boxes, class_ids, confidences
+
+def boxing(frame, indexes, boxes, class_ids, confidences, classes, font):
+    for i in indexes.flatten(): 
+            x, y, w, h =  boxes[i] #co-ordinates if bounding boxes of final object after NMS
+            label = str(classes[class_ids[i]]) #the name of the object detected
+            confidence = str(round(confidences[i], 2)) #saves the confidence rounding it to 2 decimals
+            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2) #bounda a rectangle around the object
+            #shows the confidence and object name at top left
+            cv2.putText(frame, label + " " + confidence, (x, y+20), font, 2, (255, 255, 255), 2)
+
+    return frame
--- a/mediapipe/python/solutions/ObjectDetection.py
+++ b/mediapipe/python/solutions/ObjectDetection.py
@ -0,0 +1,52 @@
+import cv2
+import os
+from Bounding_boxes import run
+from Bounding_boxes import boxing
+
+
+def ObjectDetection(video_path):
+
+    path = os.path.abspath(video_path)
+
+    if(video_path==0):
+        cap = cv2.VideoCapture(0)
+    
+    else:
+        cap = cv2.VideoCapture(path)
+
+    ret = True                                       #creates a boolean 
+    ret, old_frame = cap.read()                      #ret is true and the first frame of video saved in old_frame
+
+
+    net = cv2.dnn.readNet('modules/object_detection/object_detection_weights.weights', 'modules/object_detection/object_detection.cfg')
+        
+    classes = []
+
+    with open('modules/object_detection/object_detection_labels.txt', 'r') as f:
+        classes = f.read().splitlines() 
+    
+    if not cap.isOpened():
+        raise IOError("Cannot open webcam/Cannot read file")
+
+    while ret:
+        ret, frame = cap.read()          #saves the first frame of video in frame
+
+        indexes = []
+        boxes = []
+        class_ids = []
+        confidences = []
+        indexes, boxes, class_ids, confidences = run(frame, net, classes)
+        font = cv2.FONT_HERSHEY_PLAIN
+
+        if len(indexes) <= 0:    #if no bounding box
+            continue
+        elif len(indexes) > 0:  #if bounding box is presrnt
+
+            frame = boxing(frame, indexes, boxes, class_ids, confidences, classes, font)
+        cv2.imshow('Output', frame)
+        c = cv2.waitKey(1)           #new frame comes after () ms
+        if cv2.waitKey(1) & 0xFF == ord('q'): #press q on keyboard to stop the webcam
+            break
+
+    cap.release()
+    cv2.destroyAllWindows()          #Once out of the while loop, the pop-up window closes automatically