diff --git a/mediapipe/modules/object_detection/object_detection.cfg b/mediapipe/modules/object_detection/object_detection.cfg new file mode 100644 index 000000000..f46a4f129 --- /dev/null +++ b/mediapipe/modules/object_detection/object_detection.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=2 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=255 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=80 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 \ No newline at end of file diff --git a/mediapipe/modules/object_detection/object_detection_labels.txt b/mediapipe/modules/object_detection/object_detection_labels.txt new file mode 100644 index 000000000..16315f2be --- /dev/null +++ b/mediapipe/modules/object_detection/object_detection_labels.txt @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush \ No newline at end of file diff --git a/mediapipe/modules/object_detection/object_detection_weights.weights b/mediapipe/modules/object_detection/object_detection_weights.weights new file mode 100644 index 000000000..aad7e6c80 Binary files /dev/null and b/mediapipe/modules/object_detection/object_detection_weights.weights differ diff --git a/mediapipe/python/solutions/Bounding_boxes.py b/mediapipe/python/solutions/Bounding_boxes.py new file mode 100644 index 000000000..fdc0c6b99 --- /dev/null +++ b/mediapipe/python/solutions/Bounding_boxes.py @@ -0,0 +1,55 @@ +import cv2 +import numpy as np + +def run(frame, net, classes): + + height, width, _ = frame.shape #height and width of the frame captured + + blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), (0, 0, 0), swapRB = True, crop = False) + net.setInput(blob) + + output_layers_names = net.getUnconnectedOutLayersNames() + layerOutputs = net.forward(output_layers_names) + + boxes = [] #stores the coordinates and measurements for the bounding box + confidences = [] #Stores the confidence, i.e how much the object atches with a given class + class_ids = [] #stores all the labels + + for output in layerOutputs: #get ouput layers information + for detection in output: #extract information from each output (detection contains 85 parameters) + + scores = detection[5:] #prediction from all the classes, 6th element onwards + + class_id = np.argmax(scores) #extract location of the class with maximum confidence(index) + confidence = scores[class_id] #extract the vaue of the confidence + if confidence > 0.5: + #these are normalised co-ordinates that is why we multiply them with heigth and width to + #scale them back + center_x = int(detection[0]*width) #the center x co-ordinate of the bounding box + center_y = int(detection[1]*height) #the center y co-ordinate of the bounding box + w = int(detection[2]*width) #width of the bounding box + h = int(detection[3]*height) #height of the bounding box + + x = int(center_x - w/2) #corner x co-ordinate + y = int(center_y - h/2) #corner y co-ordinate + + boxes.append([x, y, w, h]) #saves the co-ordinates and measurement in boxes[] + confidences.append((float(confidence))) #saves the confidences of the classes + class_ids.append(class_id) #index of the classes detected + + #performs non-Max Supression on the classes with confidence greater then the threshold + indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.2) + + + return indexes, boxes, class_ids, confidences + +def boxing(frame, indexes, boxes, class_ids, confidences, classes, font): + for i in indexes.flatten(): + x, y, w, h = boxes[i] #co-ordinates if bounding boxes of final object after NMS + label = str(classes[class_ids[i]]) #the name of the object detected + confidence = str(round(confidences[i], 2)) #saves the confidence rounding it to 2 decimals + cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2) #bounda a rectangle around the object + #shows the confidence and object name at top left + cv2.putText(frame, label + " " + confidence, (x, y+20), font, 2, (255, 255, 255), 2) + + return frame \ No newline at end of file diff --git a/mediapipe/python/solutions/ObjectDetection.py b/mediapipe/python/solutions/ObjectDetection.py new file mode 100644 index 000000000..4db3e3728 --- /dev/null +++ b/mediapipe/python/solutions/ObjectDetection.py @@ -0,0 +1,52 @@ +import cv2 +import os +from Bounding_boxes import run +from Bounding_boxes import boxing + + +def ObjectDetection(video_path): + + path = os.path.abspath(video_path) + + if(video_path==0): + cap = cv2.VideoCapture(0) + + else: + cap = cv2.VideoCapture(path) + + ret = True #creates a boolean + ret, old_frame = cap.read() #ret is true and the first frame of video saved in old_frame + + + net = cv2.dnn.readNet('modules/object_detection/object_detection_weights.weights', 'modules/object_detection/object_detection.cfg') + + classes = [] + + with open('modules/object_detection/object_detection_labels.txt', 'r') as f: + classes = f.read().splitlines() + + if not cap.isOpened(): + raise IOError("Cannot open webcam/Cannot read file") + + while ret: + ret, frame = cap.read() #saves the first frame of video in frame + + indexes = [] + boxes = [] + class_ids = [] + confidences = [] + indexes, boxes, class_ids, confidences = run(frame, net, classes) + font = cv2.FONT_HERSHEY_PLAIN + + if len(indexes) <= 0: #if no bounding box + continue + elif len(indexes) > 0: #if bounding box is presrnt + + frame = boxing(frame, indexes, boxes, class_ids, confidences, classes, font) + cv2.imshow('Output', frame) + c = cv2.waitKey(1) #new frame comes after () ms + if cv2.waitKey(1) & 0xFF == ord('q'): #press q on keyboard to stop the webcam + break + + cap.release() + cv2.destroyAllWindows() #Once out of the while loop, the pop-up window closes automatically \ No newline at end of file