youjunjer · November 3, 2019 17:57 · ChelseoMiguel · Nov 24, 2022 · ousaghro · Jan 31, 2023
diff --git a/ESP32-CAM python yolo b/ESP32-CAM python yolo
 # This code is written at BigVision LLC. It is based on the OpenCV project. It is subject to the license terms in the LICENSE file found in this distribution and at http://opencv.org/license.html

 # Usage example:  python3 object_detection_yolo.py --video=run.mp4
 #                 python3 object_detection_yolo.py --image=bird.jpg
 import cv2 as cv
 import argparse
 import sys
 import numpy as np
 from urllib.request import urlopen
 import os
 import datetime
 import time

 # Initialize the parameters
 confThreshold = 0.5  #Confidence threshold
 nmsThreshold = 0.4   #Non-maximum suppression threshold置信度阈值
 inpWidth = 320       #Width of network's input image，改为320*320更快
 inpHeight = 320      #Height of network's input image，改为608*608更准

 parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
 parser.add_argument('--image', help='Path to image file.')
 parser.add_argument('--video', help='Path to video file.')
 args = parser.parse_args()

 # Load names of classes
 classesFile = "YOLO\\coco.names"
 classes = None
 with open(classesFile, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

 # Give the configuration and weight files for the model and load the network using them.
 modelConfiguration = "YOLO\\yolov3.cfg";
 modelWeights = "YOLO\\yolov3.weights";

 net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
 net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
 net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) #可切换到GPU,cv.dnn.DNN_TARGET_OPENCL，
 # 只支持Intel的GPU,没有则自动切换到cpu

 # Get the names of the output layers
 def getOutputsNames(net):
    # Get the names of all the layers in the network
    layersNames = net.getLayerNames()
    # Get the names of the output layers, i.e. the layers with unconnected outputs
    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

 # Draw the predicted bounding box
 def drawPred(classId, conf, left, top, right, bottom):
    # Draw a bounding box.
    cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)

    label = '%.2f' % conf

    # Get the label for the class name and its confidence
    if classes:
        assert(classId < len(classes))
        label = '%s:%s' % (classes[classId], label)

    #Display the label at the top of the bounding box
    labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    top = max(top, labelSize[1])
    cv.rectangle(frame, (left, top - round(1.5*labelSize[1])), (left + round(1.5*labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED)
    cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)

 # Remove the bounding boxes with low confidence using non-maxima suppression
 def postprocess(frame, outs):
    frameHeight = frame.shape[0]
    frameWidth = frame.shape[1]

    classIds = []
    confidences = []
    boxes = []
    # Scan through all the bounding boxes output from the network and keep only the
    # ones with high confidence scores. Assign the box's class label as the class with the highest score.
    classIds = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > confThreshold:
                center_x = int(detection[0] * frameWidth)
                center_y = int(detection[1] * frameHeight)
                width = int(detection[2] * frameWidth)
                height = int(detection[3] * frameHeight)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                classIds.append(classId)
                confidences.append(float(confidence))
                boxes.append([left, top, width, height])

    # Perform non maximum suppression to eliminate redundant overlapping boxes with
    # lower confidences.
    indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
    for i in indices:
        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]
        drawPred(classIds[i], confidences[i], left, top, left + width, top + height)

 # Process inputs
 winName = 'Deep learning object detection in OpenCV'
 cv.namedWindow(winName, cv.WINDOW_NORMAL)

 outputFile = "yolo_out_py.avi"
 # Webcam input
 url="http://192.168.1.149:9601/stream"
 CAMERA_BUFFRER_SIZE=4096
 stream=urlopen(url)
 bts=b''

 # Get the video writer initialized to save the output video
 #if (not args.image):
 #   vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 30, (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))

 while cv.waitKey(1) < 0:
    bts+=stream.read(CAMERA_BUFFRER_SIZE)
    jpghead=bts.find(b'\xff\xd8')
    jpgend=bts.find(b'\xff\xd9')
    if jpghead>-1 and jpgend>-1:
        jpg=bts[jpghead:jpgend+2]
        bts=bts[jpgend+2:]
        img=cv.imdecode(np.frombuffer(jpg,dtype=np.uint8),cv.IMREAD_UNCHANGED)
        v=cv.flip(img,0)
        h=cv.flip(img,1)
        p=cv.flip(img,-1)        
        frame=p
        h,w=frame.shape[:2]
        frame=cv.resize(frame,(1024,768))
        blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)
        net.setInput(blob)
        # Runs the forward pass to get output of the output layers
        outs = net.forward(getOutputsNames(net))
        # Remove the bounding boxes with low confidence
        postprocess(frame, outs)
        # Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
        t, _ = net.getPerfProfile()
        label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
        cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
        cv.imshow(winName, frame)
	# This code is written at BigVision LLC. It is based on the OpenCV project. It is subject to the license terms in the LICENSE file found in this distribution and at http://opencv.org/license.html

	# Usage example: python3 object_detection_yolo.py --video=run.mp4
	# python3 object_detection_yolo.py --image=bird.jpg
	import cv2 as cv
	import argparse
	import sys
	import numpy as np
	from urllib.request import urlopen
	import os
	import datetime
	import time

	# Initialize the parameters
	confThreshold = 0.5 #Confidence threshold
	nmsThreshold = 0.4 #Non-maximum suppression threshold置信度阈值
	inpWidth = 320 #Width of network's input image，改为320*320更快
	inpHeight = 320 #Height of network's input image，改为608*608更准

	parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
	parser.add_argument('--image', help='Path to image file.')
	parser.add_argument('--video', help='Path to video file.')
	args = parser.parse_args()

	# Load names of classes
	classesFile = "YOLO\\coco.names"
	classes = None
	with open(classesFile, 'rt') as f:
	classes = f.read().rstrip('\n').split('\n')

	# Give the configuration and weight files for the model and load the network using them.
	modelConfiguration = "YOLO\\yolov3.cfg";
	modelWeights = "YOLO\\yolov3.weights";

	net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
	net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
	net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) #可切换到GPU,cv.dnn.DNN_TARGET_OPENCL，
	# 只支持Intel的GPU,没有则自动切换到cpu

	# Get the names of the output layers
	def getOutputsNames(net):
	# Get the names of all the layers in the network
	layersNames = net.getLayerNames()
	# Get the names of the output layers, i.e. the layers with unconnected outputs
	return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

	# Draw the predicted bounding box
	def drawPred(classId, conf, left, top, right, bottom):
	# Draw a bounding box.
	cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)

	label = '%.2f' % conf

	# Get the label for the class name and its confidence
	if classes:
	assert(classId < len(classes))
	label = '%s:%s' % (classes[classId], label)

	#Display the label at the top of the bounding box
	labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
	top = max(top, labelSize[1])
	cv.rectangle(frame, (left, top - round(1.5labelSize[1])), (left + round(1.5labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED)
	cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)

	# Remove the bounding boxes with low confidence using non-maxima suppression
	def postprocess(frame, outs):
	frameHeight = frame.shape[0]
	frameWidth = frame.shape[1]

	classIds = []
	confidences = []
	boxes = []
	# Scan through all the bounding boxes output from the network and keep only the
	# ones with high confidence scores. Assign the box's class label as the class with the highest score.
	classIds = []
	confidences = []
	boxes = []
	for out in outs:
	for detection in out:
	scores = detection[5:]
	classId = np.argmax(scores)
	confidence = scores[classId]
	if confidence > confThreshold:
	center_x = int(detection[0] * frameWidth)
	center_y = int(detection[1] * frameHeight)
	width = int(detection[2] * frameWidth)
	height = int(detection[3] * frameHeight)
	left = int(center_x - width / 2)
	top = int(center_y - height / 2)
	classIds.append(classId)
	confidences.append(float(confidence))
	boxes.append([left, top, width, height])

	# Perform non maximum suppression to eliminate redundant overlapping boxes with
	# lower confidences.
	indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
	for i in indices:
	i = i[0]
	box = boxes[i]
	left = box[0]
	top = box[1]
	width = box[2]
	height = box[3]
	drawPred(classIds[i], confidences[i], left, top, left + width, top + height)

	# Process inputs
	winName = 'Deep learning object detection in OpenCV'
	cv.namedWindow(winName, cv.WINDOW_NORMAL)

	outputFile = "yolo_out_py.avi"
	# Webcam input
	url="http://192.168.1.149:9601/stream"
	CAMERA_BUFFRER_SIZE=4096
	stream=urlopen(url)
	bts=b''

	# Get the video writer initialized to save the output video
	#if (not args.image):
	# vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 30, (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))

	while cv.waitKey(1) < 0:
	bts+=stream.read(CAMERA_BUFFRER_SIZE)
	jpghead=bts.find(b'\xff\xd8')
	jpgend=bts.find(b'\xff\xd9')
	if jpghead>-1 and jpgend>-1:
	jpg=bts[jpghead:jpgend+2]
	bts=bts[jpgend+2:]
	img=cv.imdecode(np.frombuffer(jpg,dtype=np.uint8),cv.IMREAD_UNCHANGED)
	v=cv.flip(img,0)
	h=cv.flip(img,1)
	p=cv.flip(img,-1)
	frame=p
	h,w=frame.shape[:2]
	frame=cv.resize(frame,(1024,768))
	blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)
	net.setInput(blob)
	# Runs the forward pass to get output of the output layers
	outs = net.forward(getOutputsNames(net))
	# Remove the bounding boxes with low confidence
	postprocess(frame, outs)
	# Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
	t, _ = net.getPerfProfile()
	label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
	cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
	cv.imshow(winName, frame)