├── misc ├── pyttsx3-espeak.mp3 ├── browserServiceJetson.py └── objectDetectionOnJetsonNano.py ├── qrio ├── images │ ├── dogbot │ │ ├── body.png │ │ ├── dog-1.jpg │ │ ├── dog-2.jpg │ │ ├── frame.png │ │ ├── head.png │ │ ├── tail.png │ │ ├── eyebrow.png │ │ ├── fullbody.jpg │ │ ├── left-ear.png │ │ ├── eye-black.png │ │ ├── eye-white.png │ │ └── right-ear.png │ └── girlbot │ │ ├── face.png │ │ ├── eye-black.png │ │ └── eye-white.png ├── eyeCoordMapper.py ├── utilities │ ├── dictionaryMap.py │ ├── jsonFile.py │ ├── fpsCalc.py │ ├── fileSearch.py │ ├── threadManager.py │ ├── affineTransform.py │ ├── transformer.py │ ├── stopwatch.py │ ├── vector.py │ ├── imageResize.py │ └── rectArea.py ├── morphTarget.py ├── animation.py ├── morphTargetCore.py ├── config.py ├── audio.py ├── speech.py ├── morphTargetInterp.py ├── animationCore.py ├── tfutils │ ├── dataset_util.py │ ├── label_map_util.py │ └── visualization_utils.py ├── intent.py ├── robotPart.py ├── puppet.py ├── browserServiceMac.py ├── browserServiceJetson.py ├── robot.py ├── fidgetAnimationController.py ├── objectDetection.py └── brainStateMachine.py └── README.md /misc/pyttsx3-espeak.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/misc/pyttsx3-espeak.mp3 -------------------------------------------------------------------------------- /qrio/images/dogbot/body.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/dogbot/body.png -------------------------------------------------------------------------------- /qrio/images/dogbot/dog-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/dogbot/dog-1.jpg -------------------------------------------------------------------------------- /qrio/images/dogbot/dog-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/dogbot/dog-2.jpg -------------------------------------------------------------------------------- /qrio/images/dogbot/frame.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/dogbot/frame.png -------------------------------------------------------------------------------- /qrio/images/dogbot/head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/dogbot/head.png -------------------------------------------------------------------------------- /qrio/images/dogbot/tail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/dogbot/tail.png -------------------------------------------------------------------------------- /qrio/images/girlbot/face.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/girlbot/face.png -------------------------------------------------------------------------------- /qrio/images/dogbot/eyebrow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/dogbot/eyebrow.png -------------------------------------------------------------------------------- /qrio/images/dogbot/fullbody.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/dogbot/fullbody.jpg -------------------------------------------------------------------------------- /qrio/images/dogbot/left-ear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/dogbot/left-ear.png -------------------------------------------------------------------------------- /qrio/images/dogbot/eye-black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/dogbot/eye-black.png -------------------------------------------------------------------------------- /qrio/images/dogbot/eye-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/dogbot/eye-white.png -------------------------------------------------------------------------------- /qrio/images/dogbot/right-ear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/dogbot/right-ear.png -------------------------------------------------------------------------------- /qrio/images/girlbot/eye-black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/girlbot/eye-black.png -------------------------------------------------------------------------------- /qrio/images/girlbot/eye-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msubzero2000/Qrio-public/HEAD/qrio/images/girlbot/eye-white.png -------------------------------------------------------------------------------- /qrio/eyeCoordMapper.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from utilities.vector import Vector 4 | 5 | 6 | class EyeCoordMapper(object): 7 | 8 | def __init__(self): 9 | pass 10 | 11 | def transform(self, objCoord): 12 | # To calculate the puppet eye coordinate (-0.5 -> 0.5) in order to look at the object at objCoord 13 | eyeCoord = objCoord.subtract(Vector(0.5, 0.5)) 14 | 15 | return eyeCoord 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Qrio-public 2 | 3 | This is the code repository for the Qrio project, an AI driven Bot which can recognise human faces and a toys. Qrio can speak and search for relevant video on YouTubeKids.com on the identified toys. 4 | 5 | Follow the full blog here 6 | 7 | https://towardsdatascience.com/building-a-bot-that-plays-videos-for-my-toddler-597330d0005e 8 | 9 | Agustinus (Gus) Nalwan - https://www.linkedin.com/in/agustinus-nalwan 10 | -------------------------------------------------------------------------------- /qrio/utilities/dictionaryMap.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | class DictionaryMap(object): 4 | 5 | @staticmethod 6 | def getByPath(dict, path): 7 | subKeys = path.split(".") 8 | curDictValue = dict 9 | 10 | for subKey in subKeys: 11 | if subKey in curDictValue: 12 | curDictValue = curDictValue[subKey] 13 | else: 14 | return None 15 | 16 | return curDictValue 17 | -------------------------------------------------------------------------------- /qrio/utilities/jsonFile.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | class JsonFile(object): 4 | @staticmethod 5 | def jsonFromFile(filePath): 6 | with open(filePath, "r") as handle: 7 | return json.loads(handle.read()) 8 | 9 | @staticmethod 10 | def jsonToFile(filePath, jsonObj): 11 | try: 12 | with open(filePath, "w") as handle: 13 | handle.write(json.dumps(jsonObj, sort_keys=True, indent=4, separators=(',', ': '))) 14 | except Exception as e: 15 | print(e) -------------------------------------------------------------------------------- /qrio/morphTarget.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from utilities.vector import Vector 4 | 5 | 6 | class MorphTarget(object): 7 | 8 | def __init__(self, paramVecAtTarget, posDict): 9 | self._paramVecAtTarget = paramVecAtTarget 10 | self._posDict = posDict 11 | 12 | def getAt(self, name, curParamVec): 13 | if name in self._posDict: 14 | deltaVec = curParamVec.subtract(self._paramVecAtTarget) 15 | dist = max(1, deltaVec.length()) 16 | weight = 1.0 / dist 17 | 18 | return self._posDict[name].scale(weight), weight 19 | 20 | return None, 0.0 21 | -------------------------------------------------------------------------------- /qrio/animation.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class Animation(object): 5 | 6 | def __init__(self, targetVec, speed=0.01): 7 | self._targetVec = targetVec 8 | self._paramT = 0.0 9 | self._speed = speed 10 | self._animating = True 11 | 12 | def update(self): 13 | self._paramT += self._speed 14 | if self._paramT > 1.0: 15 | self._paramT = 1.0 16 | self._animating = False 17 | 18 | # Done animating 19 | return True 20 | 21 | # Still animating 22 | return False 23 | 24 | def paramT(self): 25 | return self._paramT 26 | -------------------------------------------------------------------------------- /qrio/utilities/fpsCalc.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from utilities.stopwatch import Stopwatch 4 | 5 | 6 | class FpsCalc(object): 7 | _MAX_HISTORY = 5 8 | 9 | def __init__(self): 10 | self._history = [] 11 | self._stopWatch = Stopwatch() 12 | 13 | def log(self): 14 | self._history.append(self._stopWatch.get()) 15 | if len(self._history) > self._MAX_HISTORY: 16 | self._history = self._history[-self._MAX_HISTORY:] 17 | 18 | startTime = self._history[0] 19 | stopTime = self._history[-1] 20 | if startTime == stopTime: 21 | fps = 1.0 22 | else: 23 | fps = (1000 * len(self._history)) / (stopTime - startTime) 24 | 25 | return fps 26 | -------------------------------------------------------------------------------- /qrio/morphTargetCore.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class MorphTargetCore(object): 5 | 6 | def __init__(self, poseDict, rotDict=None): 7 | self._poseDict = poseDict 8 | self._rotDict = rotDict 9 | 10 | def getWithWeight(self, weight=1.0, name=None): 11 | outPoseDict = {} 12 | outRotDict = {} 13 | 14 | for key, vec in self._poseDict.items(): 15 | if name is None or name in outPoseDict: 16 | outPoseDict[key] = vec.scale(weight) 17 | 18 | if self._rotDict is not None: 19 | for key, val in self._rotDict.items(): 20 | if name is None or name in outRotDict: 21 | outRotDict[key] = val * weight 22 | 23 | return outPoseDict, outRotDict 24 | -------------------------------------------------------------------------------- /qrio/utilities/fileSearch.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | class FileSearch: 4 | 5 | @staticmethod 6 | def collectFilesEndsWithNameRecursively(name, folderPath): 7 | fileList = [] 8 | 9 | for root, dirs, files in os.walk(folderPath): 10 | for file in files: 11 | if name is None or file.endswith(name): 12 | fileList.append(os.path.join(root, file)) 13 | 14 | return fileList 15 | 16 | @staticmethod 17 | def collectFilesEndsWithName(name, folderPath): 18 | fileList = [] 19 | 20 | for file in os.listdir(folderPath): 21 | if name is None or file.endswith(name): 22 | fileList.append(os.path.join(folderPath, file)) 23 | 24 | return fileList 25 | -------------------------------------------------------------------------------- /qrio/utilities/threadManager.py: -------------------------------------------------------------------------------- 1 | import threading 2 | from queue import Queue 3 | 4 | class ThreadManager(object): 5 | 6 | @classmethod 7 | def execute(self, callback, inputList, numThreads, data): 8 | threads = [] 9 | inputQueue = Queue() 10 | resultList = [] 11 | 12 | for input in inputList: 13 | inputQueue.put(input) 14 | 15 | for i in range(numThreads): 16 | inputQueue.put(None) 17 | 18 | for i in range(numThreads): 19 | t = threading.Thread(target=callback, args=(inputQueue, resultList, data)) 20 | t.daemon = False 21 | t.start() 22 | threads.append(t) 23 | 24 | for thread in threads: 25 | thread.join() 26 | 27 | return resultList -------------------------------------------------------------------------------- /qrio/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class Config(object): 5 | 6 | IS_JETSON = False 7 | 8 | if IS_JETSON: 9 | SCREEN_WIDTH = 2000 10 | SCREEN_HEIGHT = 1000 11 | SPRITE_SCALE = 1.0 12 | PLAYBACK_FPS = 5 13 | ENABLE_TF = True 14 | OBJECT_DETECTION = "live" 15 | SLEEP = 0 16 | else: 17 | SCREEN_WIDTH = 720 18 | SCREEN_HEIGHT = 540 19 | SPRITE_SCALE = 0.5 20 | PLAYBACK_FPS = 20 21 | ENABLE_TF = False 22 | OBJECT_DETECTION = "fake" 23 | SLEEP = 0.03 24 | 25 | ENABLE_BROWSER = False 26 | VIDEO_PLAYBACK_TIME = 30 27 | MAX_FPS = 60 28 | 29 | 30 | @classmethod 31 | def frameScale(cls, frames): 32 | return frames * Config.PLAYBACK_FPS / Config.MAX_FPS 33 | 34 | @classmethod 35 | def speedScale(cls, speed): 36 | return min(0.5, speed * Config.MAX_FPS / Config.PLAYBACK_FPS) 37 | -------------------------------------------------------------------------------- /qrio/utilities/affineTransform.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import math 3 | from affine import Affine 4 | from utilities.vector import Vector 5 | 6 | class AffineTransform(object): 7 | 8 | def __init__(self, affine=Affine.identity()): 9 | self._affine = affine 10 | 11 | def rotate(self, angle): 12 | self._affine *= Affine.rotation(angle) 13 | 14 | def translate(self, offset): 15 | self._affine *= Affine.translation(offset.x, offset.y) 16 | 17 | def scale(self, scale): 18 | self._affine *= Affine.scale(scale) 19 | 20 | def transform(self, point): 21 | result = self._affine * (point.x, point.y) 22 | return Vector(result[0], result[1]) 23 | 24 | def copy(self): 25 | return AffineTransform(copy.deepcopy(self._affine)) 26 | 27 | def getTranslation(self): 28 | return Vector(self._affine[2], self._affine[5]) 29 | 30 | def getRotation(self): 31 | angle = math.atan2(self._affine[3], self._affine[0]) * 180.0 / math.pi 32 | 33 | return angle 34 | -------------------------------------------------------------------------------- /qrio/utilities/transformer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | from affine import Affine 4 | 5 | from utilities.vector import Vector 6 | 7 | 8 | class Transformer(object): 9 | 10 | def __init__(self, offset=Vector(0, 0), scale=1.0, angle=0.0): 11 | self._pos = offset 12 | self._scale = scale 13 | self._angle = angle 14 | self._affine = Affine.identity() 15 | 16 | def apply(self, offset, scale): 17 | self._affine = self._affine.translation(offset.x, offset.y) 18 | offset = offset.scale(self._scale) 19 | 20 | self._pos = self._pos.add(offset) 21 | self._scale = scale 22 | 23 | return self._pos, self._scale, self._angle 24 | 25 | def copy(self): 26 | return Transformer(self._pos, self._scale, self._angle) 27 | 28 | def offset(self, offset): 29 | self._pos = self._pos.add(offset) 30 | 31 | def transform(self, offset, angle): 32 | self._pos = self._pos.add(offset) 33 | if angle is not None: 34 | self._angle += angle -------------------------------------------------------------------------------- /qrio/utilities/stopwatch.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | class Stopwatch: 4 | 5 | _MILLISECONDS_PER_SECOND = 1000 6 | 7 | def __init__(self): 8 | self._reset() 9 | 10 | def stop(self): 11 | if (self._duration is None): 12 | end = datetime.datetime.now() 13 | delta = end - self.start 14 | self._duration = int(delta.total_seconds() * Stopwatch._MILLISECONDS_PER_SECOND) # only return whole int number, don't need micro seconds 15 | return self._duration 16 | 17 | def restart(self): 18 | ms = self.stop() 19 | self._reset() 20 | return ms 21 | 22 | def _reset(self): 23 | self.start = datetime.datetime.now() 24 | self._duration = None 25 | 26 | def get(self): 27 | if (self._duration is None): 28 | end = datetime.datetime.now() 29 | delta = end - self.start 30 | return int(delta.total_seconds() * Stopwatch._MILLISECONDS_PER_SECOND) 31 | 32 | return 0 33 | 34 | -------------------------------------------------------------------------------- /qrio/utilities/vector.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | 4 | class Vector(object): 5 | 6 | def __init__(self, x, y): 7 | self.x = x 8 | self.y = y 9 | 10 | def abs(self): 11 | return Vector(abs(self.x), abs(self.y)) 12 | 13 | def length(self): 14 | return math.sqrt(pow(self.x, 2) + pow(self.y, 2)) 15 | 16 | def normalise(self): 17 | length = self.length() 18 | return Vector(self.x / length, self.y / length) 19 | 20 | def rotateClockwise(self): 21 | return Vector(self.y, -self.x) 22 | 23 | def rotateAntiClockwise(self): 24 | return Vector(-self.y, self.x) 25 | 26 | def multiply(self, vector): 27 | return Vector(self.x * vector.x, self.y * vector.y) 28 | 29 | def scale(self, length): 30 | return Vector(self.x * length, self.y * length) 31 | 32 | def subtract(self, vec): 33 | return Vector(self.x - vec.x, self.y - vec.y) 34 | 35 | def add(self, vec): 36 | return Vector(self.x + vec.x, self.y + vec.y) 37 | 38 | def rotate(self, angle): 39 | anglePi = angle * math.pi / 180.0 40 | return Vector(self.x * math.cos(anglePi) - self.y * math.sin(anglePi), 41 | self.x * math.sin(anglePi) + self.y * math.cos(anglePi)) 42 | -------------------------------------------------------------------------------- /qrio/audio.py: -------------------------------------------------------------------------------- 1 | 2 | import threading 3 | import time 4 | 5 | from playsound import playsound 6 | from utilities.stopwatch import Stopwatch 7 | 8 | 9 | class StoppableThread(threading.Thread): 10 | """Thread class with a stop() method. The thread itself has to check 11 | regularly for the stopped() condition.""" 12 | 13 | def __init__(self, *args, **kwargs): 14 | super(StoppableThread, self).__init__(*args, **kwargs) 15 | self._stop_event = threading.Event() 16 | 17 | def stop(self): 18 | self._stop_event.set() 19 | 20 | def stopped(self): 21 | return self._stop_event.is_set() 22 | 23 | class Audio(object): 24 | 25 | _AFTER_PLAYING_DURATION = 1 26 | 27 | def __init__(self): 28 | self._thread = None 29 | self._timeSinceStopPlaying = None 30 | 31 | def _play(self, name): 32 | playsound(name) 33 | 34 | def play(self, name): 35 | if self._thread is not None and not self._thread.isAlive(): 36 | self._thread.stop() 37 | 38 | self._thread = None 39 | 40 | self._thread = StoppableThread(target=self._play, args=(name,)) 41 | self._thread.start() 42 | self._timeSinceStopPlaying = None 43 | 44 | def isPlaying(self): 45 | ret = self._thread is not None and self._thread.isAlive() 46 | 47 | if not ret and self._thread is not None and self._timeSinceStopPlaying is None: 48 | self._timeSinceStopPlaying = Stopwatch() 49 | 50 | return ret 51 | 52 | def isAfterPlaying(self): 53 | if self._thread is None: 54 | return False 55 | 56 | if not self._thread.isAlive(): 57 | if self._timeSinceStopPlaying is None: 58 | self._timeSinceStopPlaying = Stopwatch() 59 | return True 60 | else: 61 | return self._timeSinceStopPlaying.get() / 1000 < self._AFTER_PLAYING_DURATION 62 | 63 | return False 64 | -------------------------------------------------------------------------------- /qrio/speech.py: -------------------------------------------------------------------------------- 1 | import os 2 | import boto3 3 | import hashlib 4 | 5 | from audio import Audio 6 | from utilities.fileSearch import FileSearch 7 | 8 | 9 | class Speech(object): 10 | _YOUR_AWS_KEY = "" # Enter your aws key here 11 | _YOUR_AWS_SECRET = "" # Enter your aws secret here 12 | 13 | def __init__(self, audioFolder): 14 | self._audio = Audio() 15 | self._audioFolder = audioFolder 16 | audioFilePathList = FileSearch.collectFilesEndsWithNameRecursively(".ogg", audioFolder) 17 | 18 | self._cache = {} 19 | for path in audioFilePathList: 20 | fileName = path.split("/")[-1].split(".")[0] 21 | self._cache[fileName] = path 22 | 23 | self._pollyClient = boto3.Session( 24 | aws_access_key_id=self._YOUR_AWS_KEY, 25 | aws_secret_access_key=self._YOUR_AWS_SECRET, 26 | region_name='ap-southeast-2').client('polly') 27 | 28 | def speak(self, text): 29 | if len(self._YOUR_AWS_KEY) == 0 or len(self._YOUR_AWS_SECRET) == 0: 30 | return 31 | 32 | hashObject = hashlib.sha1(text.encode()) 33 | hash = hashObject.hexdigest() 34 | 35 | if hash in self._cache: 36 | audioFilePath = self._cache[hash] 37 | else: 38 | audioFilePath = self._tts(text, hash) 39 | 40 | self._cache[hash] = audioFilePath 41 | 42 | self._audio.play(audioFilePath) 43 | 44 | 45 | def isSpeaking(self): 46 | return self._audio.isPlaying() 47 | 48 | def isAfterSpeaking(self): 49 | return self._audio.isAfterPlaying() 50 | 51 | def _tts(self, text, hash): 52 | response = self._pollyClient.synthesize_speech(VoiceId='Ivy', 53 | OutputFormat='mp3', 54 | Text = text) 55 | filePath = os.path.join(self._audioFolder, "{0}.mp3".format(hash)) 56 | file = open(filePath, 'wb') 57 | file.write(response['AudioStream'].read()) 58 | file.close() 59 | 60 | return filePath 61 | -------------------------------------------------------------------------------- /qrio/morphTargetInterp.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from morphTargetCore import MorphTargetCore 4 | from utilities.vector import Vector 5 | 6 | 7 | class MorphTargetInterp(object): 8 | 9 | def __init__(self): 10 | self._morphTargets = [] 11 | 12 | def addMorpTarget(self, morphTarget, vecParamAtTarget): 13 | self._morphTargets.append((morphTarget, vecParamAtTarget)) 14 | 15 | def getPosAtVecParamForQuad(self, vecParam): 16 | finalPos = {} 17 | 18 | lowerLeftPos, _ = self._morphTargets[0][0].getWithWeight(1.0) 19 | lowerRightPos, _ = self._morphTargets[1][0].getWithWeight(1.0) 20 | upperLeftPos, _ = self._morphTargets[3][0].getWithWeight(1.0) 21 | 22 | weightX = (vecParam.x - self._morphTargets[0][1].x) / (self._morphTargets[1][1].x - self._morphTargets[0][1].x) 23 | weightY = (vecParam.y - self._morphTargets[1][1].y) / (self._morphTargets[2][1].y - self._morphTargets[1][1].y) 24 | 25 | for name, vec in lowerLeftPos.items(): 26 | finalVec = Vector(lowerLeftPos[name].x * (1.0 - weightX) + lowerRightPos[name].x * weightX, 27 | lowerLeftPos[name].y * (1.0 - weightY) + upperLeftPos[name].y * weightY) 28 | 29 | finalPos[name] = finalVec 30 | 31 | return finalPos 32 | 33 | def getPosAtVecParam(self, vecParam): 34 | totalWeight = 0 35 | finalPos = {} 36 | 37 | for morphTarget, vecParamAtTarget in self._morphTargets: 38 | curDist = max(0.01, vecParam.subtract(vecParamAtTarget).length()) 39 | weight = 1.0 / curDist 40 | totalWeight += weight 41 | curPos = morphTarget.getWithWeight(weight) 42 | 43 | for name, vec in curPos.items(): 44 | if name in finalPos: 45 | finalPos[name] = finalPos[name].add(vec) 46 | else: 47 | finalPos[name] = vec 48 | 49 | normPos = {} 50 | for name, vec in finalPos.items(): 51 | normPos[name] = vec.scale(1.0 / totalWeight) 52 | 53 | return normPos 54 | -------------------------------------------------------------------------------- /qrio/animationCore.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from morphTargetCore import MorphTargetCore 4 | 5 | 6 | class KeyFrameAnimation(object): 7 | 8 | def __init__(self, morphTargetStart:MorphTargetCore, morphTargetEnd:MorphTargetCore): 9 | self._morphTargetStart = morphTargetStart 10 | self._morphTargetEnd = morphTargetEnd 11 | 12 | def getPosAt(self, t, name=None): 13 | outPoseDictStart, outRotDictStart = self._morphTargetStart.getWithWeight(1.0 - t, name) 14 | outPoseDictStop, outRotDictStop = self._morphTargetEnd.getWithWeight(t, name) 15 | 16 | finalOutPoseDict = {} 17 | finalOutRotDict = {} 18 | 19 | for key, vec in outPoseDictStart.items(): 20 | if key in outPoseDictStop: 21 | finalOutPoseDict[key] = outPoseDictStart[key].add(outPoseDictStop[key]) 22 | 23 | for key, vec in outRotDictStart.items(): 24 | if key in outRotDictStop: 25 | finalOutRotDict[key] = outRotDictStart[key] + outRotDictStop[key] 26 | 27 | return finalOutPoseDict, finalOutRotDict 28 | 29 | def getMorphTargetEnd(self): 30 | return self._morphTargetEnd 31 | 32 | def getMorphTargetStart(self): 33 | return self._morphTargetStart 34 | 35 | 36 | class AnimationCore(object): 37 | 38 | def __init__(self, morphTargetStart:MorphTargetCore, morphTargetEnd:MorphTargetCore, speed=0.01, pauseAtStart=0): 39 | self._keyFrameAnimation = KeyFrameAnimation(morphTargetStart, morphTargetEnd) 40 | self._speed = speed 41 | self._curT = 0 42 | self._done = False 43 | self._pauseAtStart = pauseAtStart 44 | 45 | def update(self): 46 | if self._done: 47 | return None, None 48 | 49 | if self._curT >= 1: 50 | # Terminate animation in the next iteration 51 | self._done = True 52 | 53 | poseDict, rotDict = self._keyFrameAnimation.getPosAt(min(1, self._curT)) 54 | 55 | # Don't increment timer if we are still at pause period 56 | if self._pauseAtStart > 0: 57 | self._pauseAtStart -= 1 58 | else: 59 | self._curT += self._speed 60 | 61 | return poseDict, rotDict 62 | 63 | def getMorphTargetEnd(self): 64 | return self._keyFrameAnimation.getMorphTargetEnd() 65 | 66 | def getMorphTargetStart(self): 67 | return self._keyFrameAnimation.getMorphTargetStart() 68 | -------------------------------------------------------------------------------- /qrio/utilities/imageResize.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from PIL import Image 4 | 5 | class ImageResizeMode(object): 6 | STRETCH_TO_FIT = "stretchToFit" # Ignore aspect ratio, just stretch to the target dimension 7 | RESIZE_TO_FIT = "resizeToFit" # Keeping aspect ratio, resize as big as possible but no part of image outside the target dimension 8 | RESIZE_TO_FILL = "resizeToFill" # Keeping aspect ratio, resize as small as possible but making sure target dimension area has been completely filled 9 | 10 | 11 | class ImageResize(object): 12 | 13 | @classmethod 14 | def resizeImageDimension(cls, imgWidth: int, imgHeight: int, targetWidth: int, targetHeight: int, 15 | resizeMode: ImageResizeMode, canResizeUp: bool=True): 16 | if resizeMode == ImageResizeMode.STRETCH_TO_FIT: 17 | newWidth = targetWidth 18 | newHeight = targetHeight 19 | 20 | if not canResizeUp: 21 | if newWidth > imgWidth: 22 | newWidth = imgWidth 23 | if newHeight > imgHeight: 24 | newHeight =imgHeight 25 | 26 | return newWidth, newHeight 27 | else: 28 | sx = imgWidth / targetWidth 29 | sy = imgHeight / targetHeight 30 | 31 | if resizeMode == ImageResizeMode.RESIZE_TO_FIT: 32 | s = sx if sx > sy else sy 33 | else: 34 | s = sx if sx < sy else sy 35 | 36 | if not canResizeUp and s < 1.0: 37 | s = 1.0 38 | 39 | newWidth = int(imgWidth / s) 40 | newHeight = int(imgHeight / s) 41 | 42 | return newWidth, newHeight 43 | 44 | @classmethod 45 | def resizeImage(cls, image: Image, targetWidth: int, targetHeight: int, 46 | resizeMode: ImageResizeMode, canResizeUp: bool=True, addPadding: bool = False): 47 | newWidth, newHeight = ImageResize.resizeImageDimension(image.size[0], image.size[1], targetWidth, targetHeight, 48 | resizeMode, canResizeUp) 49 | 50 | newImage = image.resize((int(newWidth), int(newHeight))) 51 | 52 | if addPadding and resizeMode == ImageResizeMode.RESIZE_TO_FIT and (newWidth != targetWidth or newHeight != targetHeight): 53 | extraWidth = targetWidth - newWidth 54 | extraHeight = targetHeight - newHeight 55 | 56 | paddedImage = Image.new('RGB', (targetWidth, targetHeight), (0, 0, 0)) 57 | paddedImage.paste(newImage, (int(extraWidth / 2), int(extraHeight / 2))) 58 | 59 | return paddedImage 60 | 61 | return newImage 62 | 63 | 64 | -------------------------------------------------------------------------------- /qrio/tfutils/dataset_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utility functions for creating TFRecord data sets.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import tensorflow as tf 23 | 24 | 25 | def int64_feature(value): 26 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 27 | 28 | 29 | def int64_list_feature(value): 30 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 31 | 32 | 33 | def bytes_feature(value): 34 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 35 | 36 | 37 | def bytes_list_feature(value): 38 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) 39 | 40 | 41 | def float_list_feature(value): 42 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 43 | 44 | 45 | def read_examples_list(path): 46 | """Read list of training or validation examples. 47 | 48 | The file is assumed to contain a single example per line where the first 49 | token in the line is an identifier that allows us to find the image and 50 | annotation xml for that example. 51 | 52 | For example, the line: 53 | xyz 3 54 | would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored). 55 | 56 | Args: 57 | path: absolute path to examples list file. 58 | 59 | Returns: 60 | list of example identifiers (strings). 61 | """ 62 | with tf.gfile.GFile(path) as fid: 63 | lines = fid.readlines() 64 | return [line.strip().split(' ')[0] for line in lines] 65 | 66 | 67 | def recursive_parse_xml_to_dict(xml): 68 | """Recursively parses XML contents to python dict. 69 | 70 | We assume that `object` tags are the only ones that can appear 71 | multiple times at the same level of a tree. 72 | 73 | Args: 74 | xml: xml tree obtained by parsing XML file contents using lxml.etree 75 | 76 | Returns: 77 | Python dictionary holding XML contents. 78 | """ 79 | if not xml: 80 | return {xml.tag: xml.text} 81 | result = {} 82 | for child in xml: 83 | child_result = recursive_parse_xml_to_dict(child) 84 | if child.tag != 'object': 85 | result[child.tag] = child_result[child.tag] 86 | else: 87 | if child.tag not in result: 88 | result[child.tag] = [] 89 | result[child.tag].append(child_result[child.tag]) 90 | return {xml.tag: result} 91 | -------------------------------------------------------------------------------- /qrio/intent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | from speech import Speech 5 | from config import Config 6 | 7 | if Config.IS_JETSON: 8 | from browserServiceJetson import BrowserService 9 | else: 10 | from browserServiceMac import BrowserService 11 | 12 | from utilities.stopwatch import Stopwatch 13 | 14 | 15 | class Intent(object): 16 | 17 | def __init__(self): 18 | self._speech = Speech("audio/") 19 | self._startPlayingVideoTime = None 20 | if Config.ENABLE_BROWSER: 21 | print("Initing browser intent") 22 | self._browserService = BrowserService() 23 | 24 | def isBusy(self): 25 | #TODO: Add a bit of delay to keep busy status 2 seconds after talking 26 | return self.isTalking() or self.isAfterTalking() or self.isPlayingVideo() 27 | 28 | def isTalking(self): 29 | return self._speech.isSpeaking() 30 | 31 | def isAfterTalking(self): 32 | return self._speech.isAfterSpeaking() 33 | 34 | def askToComeAndPlay(self): 35 | self._speech.speak("Hi Dexie? do you want to come and play?") 36 | 37 | def askToBringObject(self): 38 | self._speech.speak("Dexie? Do you want to bring me something?") 39 | 40 | def askToBringNewObject(self, oldObjectName): 41 | self._speech.speak("We have just played with {0} already. Why don'y you bring me something else?".format(self._appendObjectNameAbbreviation(oldObjectName))) 42 | 43 | def askToBringAnotherObject(self): 44 | self._speech.speak("Well, that was fun isn't it? Do you want to bring me something else?") 45 | 46 | def _appendObjectNameAbbreviation(self, objectName): 47 | objectName = objectName.lower() 48 | if objectName[0] in {'a', 'i', 'e', 'o', 'u'}: 49 | objectName = "an {0}".format(objectName) 50 | else: 51 | objectName = "a {0}".format(objectName) 52 | 53 | return objectName 54 | 55 | def dontHaveVideo(self, objectName): 56 | self._speech.speak("I am sorry. I cannot find a video about {0}! Do you want to bring me something else?".format( 57 | self._appendObjectNameAbbreviation(objectName))) 58 | 59 | def objectRecognised(self, objectName): 60 | self._speech.speak("Hey, I think that is {0}!".format(self._appendObjectNameAbbreviation(objectName))) 61 | 62 | def playVideo(self, objectName): 63 | ret = True 64 | self._speech.speak("Hang on a second. Let me play you a video about {0}!".format(self._appendObjectNameAbbreviation(objectName))) 65 | if Config.ENABLE_BROWSER: 66 | ret = self._browserService.searchAndPlay(objectName) 67 | 68 | if ret: 69 | self._startPlayingVideoTime = Stopwatch() 70 | 71 | return ret 72 | 73 | def isPlayingVideo(self): 74 | if self._startPlayingVideoTime is None: 75 | return False 76 | 77 | elapsedSec = self._startPlayingVideoTime.get() / 1000 78 | 79 | return elapsedSec < Config.VIDEO_PLAYBACK_TIME 80 | 81 | def stopVideo(self): 82 | if Config.ENABLE_BROWSER: 83 | self._browserService.stop() 84 | -------------------------------------------------------------------------------- /qrio/robotPart.py: -------------------------------------------------------------------------------- 1 | import arcade 2 | 3 | from utilities.affineTransform import AffineTransform 4 | from utilities.vector import Vector 5 | from morphTarget import MorphTarget 6 | 7 | 8 | class RobotPart(object): 9 | 10 | def __init__(self, relativeNaturalPos, imagePath, spriteScale=1.0, name=None, scale=1.0, 11 | rotation=0.0): 12 | self._name = name 13 | 14 | if self._name is None: 15 | self._name = imagePath.split("/")[-1].split(".")[0] 16 | 17 | self._relativeNaturalPos = relativeNaturalPos 18 | self._scale = scale 19 | self._rotation = rotation 20 | self._sprite = arcade.Sprite(imagePath, spriteScale) 21 | self._parts = [] 22 | 23 | def spriteList(self): 24 | spriteList = [self._sprite] 25 | 26 | for part in self._parts: 27 | spriteList.extend(part.spriteList()) 28 | 29 | return spriteList 30 | 31 | def _getFinalMorphTargetPos(self, paramVec:Vector, morphTargets:[MorphTarget]): 32 | # paramVec = Vector(0.5, -0.5) 33 | 34 | finalPos = Vector(0, 0) 35 | totalWeight = 0.0 36 | 37 | for target in morphTargets: 38 | curPos, weight = target.getAt(self._name, paramVec) 39 | # curPos = Vector(30.0, 30.0) 40 | # weight = 1.0 41 | 42 | # curPos = target.getAt(self._name, paramVec) 43 | totalWeight += weight 44 | if curPos is not None: 45 | finalPos = finalPos.add(curPos) 46 | 47 | return finalPos.scale(totalWeight) 48 | 49 | def _getPoseAt(self, poseDict): 50 | if self._name in poseDict: 51 | return poseDict[self._name] 52 | 53 | return None 54 | 55 | def _getRotAt(self, rotDict): 56 | if self._name in rotDict: 57 | return rotDict[self._name] 58 | 59 | return None 60 | 61 | def update(self, trans: AffineTransform, poseDict, rotDict): 62 | # Further transform (carried from parent transform) by current natural pose offset 63 | trans.translate(self._relativeNaturalPos) 64 | animOffset = self._getPoseAt(poseDict) 65 | 66 | # Apply animation offset 67 | if animOffset is not None: 68 | trans.translate(animOffset) 69 | 70 | # Apply scale transform 71 | trans.scale(self._scale) 72 | 73 | animRot = self._getRotAt(rotDict) 74 | 75 | # Extract the translation of current transform 76 | pos = trans.getTranslation() 77 | self._sprite.center_x = pos.x 78 | self._sprite.center_y = pos.y 79 | 80 | # Apply rotation base transform 81 | trans.rotate(self._rotation) 82 | 83 | # Apply animation transform 84 | if animRot is not None: 85 | trans.rotate(animRot) 86 | 87 | # Extract the current rotation angle 88 | rotAngle = trans.getRotation() 89 | 90 | self._sprite.angle = rotAngle 91 | 92 | for part in self._parts: 93 | # Propagate the current transform the all children 94 | subTrans = trans.copy() 95 | part.update(subTrans, poseDict, rotDict) 96 | 97 | def appendPart(self, part): 98 | self._parts.append(part) 99 | -------------------------------------------------------------------------------- /qrio/puppet.py: -------------------------------------------------------------------------------- 1 | import arcade 2 | import time 3 | 4 | from config import Config 5 | from robot import Robot 6 | from morphTarget import MorphTarget 7 | from utilities.vector import Vector 8 | from animation import Animation 9 | from objectDetection import ObjectDetectionOffline, ObjectDetection, ObjectDetectionFake 10 | from utilities.fpsCalc import FpsCalc 11 | 12 | 13 | class MyGame(arcade.Window): 14 | """ Main application class. """ 15 | 16 | def __init__(self, width, height): 17 | super().__init__(width, height) 18 | 19 | arcade.set_background_color(arcade.color.WHITE) 20 | self._width = width 21 | self._height = height 22 | self._fpsCalc = FpsCalc() 23 | 24 | def setup(self): 25 | movieFilePath = "/Users/agustinus.nalwan/Desktop/Work/AI/Dev/Personal/Qnabot/code/testVideo/footage-toys.mp4" 26 | objDetectCacheFolder = "/Users/agustinus.nalwan/Desktop/Work/AI/Dev/Personal/Qnabot/code/testVideo/objDetect/footage-toys" 27 | 28 | if Config.OBJECT_DETECTION == "live": 29 | self._objectDetection = ObjectDetection() 30 | elif Config.OBJECT_DETECTION == "fake": 31 | self._objectDetection = ObjectDetectionFake() 32 | elif Config.OBJECT_DETECTION == "offline": 33 | self._objectDetection = ObjectDetectionOffline(movieFilePath=movieFilePath, objDetectCacheFolder=objDetectCacheFolder) 34 | 35 | self._robot = Robot((self._width, self._height)) 36 | 37 | # self._paramVec = Vector(0.0, 0.0) 38 | # self._targetParamVec = Vector(0.0, 0.0) 39 | 40 | #TODO: Testing the eye target animation using a timer animation. Remove this code when we 41 | #already replace with vector containing location of face from object detection 42 | # self._animateParamVectorTo(Vector(0.0, -0.5), 0.05) 43 | 44 | # def _animateParamVectorTo(self, newVector, speed): 45 | # self._animation = Animation(newVector, speed) 46 | 47 | def on_draw(self): 48 | """ Render the screen. """ 49 | arcade.start_render() 50 | # Your drawing code goes here 51 | self._robot.spriteList().draw() 52 | # arcade.glEnable(arcade.GL_TEXTURE_2D) 53 | # arcade.glTexParameteri(arcade.GL_TEXTURE_2D, arcade.GL_TEXTURE_MIN_FILTER, arcade.GL_NEAREST) 54 | # arcade.glTexParameteri(arcade.GL_TEXTURE_2D, arcade.GL_TEXTURE_MAG_FILTER, arcade.GL_NEAREST) 55 | 56 | def update(self, delta_time): 57 | if Config.SLEEP > 0: 58 | time.sleep(Config.SLEEP) 59 | 60 | """ All the logic to move, and the game logic goes here. """ 61 | # print(delta_time) 62 | lastFrameCaptured = self._objectDetection.getLastFrameCaptured() 63 | print("FPS: {0:.1f}".format(self._fpsCalc.log())) 64 | self._robot.update(lastFrameCaptured) 65 | # if self._animation.update(): 66 | # self._animateParamVectorTo(Vector(0.0, -0.5), 0.02) 67 | # sound = Sound('speech.ogg') 68 | # playstat=sound.play() 69 | # while True: 70 | # time.sleep(1) 71 | # print(sound.player._players[0].time) 72 | # pass 73 | 74 | def main(): 75 | game = MyGame(Config.SCREEN_WIDTH, Config.SCREEN_HEIGHT) 76 | game.setup() 77 | arcade.run() 78 | 79 | 80 | if __name__ == "__main__": 81 | main() 82 | -------------------------------------------------------------------------------- /qrio/browserServiceMac.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.common.exceptions import TimeoutException 3 | from selenium.webdriver.support.ui import WebDriverWait 4 | from selenium.webdriver.support import expected_conditions as EC 5 | from selenium.webdriver.common.by import By 6 | import time 7 | import urllib 8 | 9 | 10 | class BrowserService(object): 11 | 12 | _CHROME_PATH = '/Users/agustinus.nalwan/Downloads/chromedriver' 13 | _YOUTUBE_KIDS_HOME_URL = "http://youtubekids.com" 14 | _YOUTUBE_KIDS_SEARCH_URL = "https://www.youtubekids.com/search?q={0}&hl=en-GB" 15 | _LOAD_TIMEOUT = 5 16 | 17 | def __init__(self, windowPos=(200, 0, 1080, 960)): 18 | self._setup(windowPos) 19 | 20 | def _setup(self, windowPos): 21 | self._driver = webdriver.Chrome(BrowserService._CHROME_PATH) 22 | self._driver.set_window_position(windowPos[0], windowPos[1]) 23 | self._driver.set_window_size(windowPos[2], windowPos[3]) 24 | 25 | # Open youtubeKids 26 | self._driver.get(BrowserService._YOUTUBE_KIDS_HOME_URL) 27 | self._wait() 28 | self._driver.find_element_by_id('parent-button').click() 29 | self._driver.find_element_by_id('next-button').click() 30 | self._driver.find_element_by_id('onboarding-age-gate-digit-1').send_keys('1') 31 | self._driver.find_element_by_id('onboarding-age-gate-digit-2').send_keys('9') 32 | self._driver.find_element_by_id('onboarding-age-gate-digit-3').send_keys('8') 33 | self._driver.find_element_by_id('onboarding-age-gate-digit-4').send_keys('0') 34 | self._driver.find_element_by_css_selector(".flow-buttons > #submit-button").click() 35 | self._driver.find_element_by_id('show-text-link').click() 36 | self._driver.find_element_by_css_selector('.ytk-kids-flow-text-info-renderer > #next-button').click() 37 | self._driver.find_element_by_id('skip-button').click() 38 | time.sleep(2) 39 | self._driver.find_element_by_css_selector('.ytk-kids-onboarding-parental-notice-page-renderer > #next-button').click() 40 | time.sleep(2) 41 | self._driver.find_element_by_xpath("//img[contains(@src,'https://www.gstatic.com/ytkids/onboarding/content_card_broadway/content_level_age_preschool_normal_564_500.png')]").click() 42 | time.sleep(2) 43 | self._driver.find_element_by_css_selector('#select-link').click() 44 | self._driver.find_element_by_css_selector('#search-on-button').click() 45 | self._driver.find_element_by_css_selector('#done-button').click() 46 | 47 | def _wait(self, elementId='element_id'): 48 | try: 49 | element_present = EC.presence_of_element_located((By.ID, elementId)) 50 | WebDriverWait(self._driver, self._LOAD_TIMEOUT).until(element_present) 51 | except TimeoutException: 52 | print 53 | "Timed out waiting for page to load" 54 | 55 | def searchAndPlay(self, keyword): 56 | keywordEncoded = urllib.parse.quote(keyword, safe='') 57 | 58 | self._driver.get(self._YOUTUBE_KIDS_SEARCH_URL.format(keywordEncoded)) 59 | try: 60 | self._wait() 61 | 62 | video = self._driver.find_element_by_class_name('yt-simple-endpoint.ytk-compact-video-renderer') 63 | if video is not None: 64 | video.click() 65 | self._wait() 66 | self._driver.find_element_by_css_selector('#player-fullscreen-button > #icon').click() 67 | 68 | return True 69 | except Exception as ex: 70 | pass 71 | 72 | return False 73 | 74 | def stop(self): 75 | self._driver.find_element_by_css_selector('#player-fullscreen-button > #icon').click() 76 | self._wait() 77 | self._driver.get(self._YOUTUBE_KIDS_HOME_URL) 78 | 79 | # browser = BrowserService() 80 | # time.sleep(3) 81 | # browser.searchAndPlay("bus") 82 | # time.sleep(10) 83 | # browser.stop() 84 | # time.sleep(1000) 85 | 86 | -------------------------------------------------------------------------------- /misc/browserServiceJetson.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.common.exceptions import TimeoutException 3 | from selenium.webdriver.support.ui import WebDriverWait 4 | from selenium.webdriver.support import expected_conditions as EC 5 | from selenium.webdriver.common.by import By 6 | import time 7 | import urllib 8 | 9 | 10 | class BrowserService(object): 11 | 12 | _CHROME_PATH = '/usr/lib/chromium-browser/chromedriver' 13 | _YOUTUBE_KIDS_HOME_URL = "http://youtubekids.com" 14 | _YOUTUBE_KIDS_SEARCH_URL = "https://www.youtubekids.com/search?q={0}&hl=en-GB" 15 | _LOAD_TIMEOUT = 5 16 | 17 | def __init__(self, windowPos=(3000, 0, 1080, 960)): 18 | self._setup(windowPos) 19 | 20 | def _setup(self, windowPos): 21 | print("Setting up Browser!!") 22 | options = webdriver.ChromeOptions() 23 | #options.add_argument('--remote-debugging-port=9515') 24 | #options.add_argument('--no-sandbox') 25 | #options.add_argument('--disable-dev-shm-usage') 26 | options.binary_location = BrowserService._CHROME_PATH 27 | self._driver = webdriver.Chrome(BrowserService._CHROME_PATH) 28 | self._driver.set_window_position(windowPos[0], windowPos[1]) 29 | self._driver.set_window_size(windowPos[2], windowPos[3]) 30 | 31 | # Open youtubeKids 32 | self._driver.get(BrowserService._YOUTUBE_KIDS_HOME_URL) 33 | self._wait() 34 | self._driver.find_element_by_id('parent-button').click() 35 | self._driver.find_element_by_id('next-button').click() 36 | self._driver.find_element_by_id('onboarding-age-gate-digit-1').send_keys('1') 37 | self._driver.find_element_by_id('onboarding-age-gate-digit-2').send_keys('9') 38 | self._driver.find_element_by_id('onboarding-age-gate-digit-3').send_keys('8') 39 | self._driver.find_element_by_id('onboarding-age-gate-digit-4').send_keys('0') 40 | self._driver.find_element_by_css_selector(".flow-buttons > #submit-button").click() 41 | self._driver.find_element_by_id('show-text-link').click() 42 | self._driver.find_element_by_css_selector('.ytk-kids-flow-text-info-renderer > #next-button').click() 43 | self._driver.find_element_by_id('skip-button').click() 44 | time.sleep(2) 45 | self._driver.find_element_by_css_selector('.ytk-kids-onboarding-parental-notice-page-renderer > #next-button').click() 46 | time.sleep(2) 47 | self._driver.find_element_by_xpath("//img[contains(@src,'https://www.gstatic.com/ytkids/onboarding/content_card_broadway/content_level_age_preschool_normal_282_250.png')]").click() 48 | time.sleep(2) 49 | self._driver.find_element_by_css_selector('#select-link').click() 50 | self._driver.find_element_by_css_selector('#search-on-button').click() 51 | self._driver.find_element_by_css_selector('#done-button').click() 52 | 53 | def _wait(self, elementId='element_id'): 54 | try: 55 | element_present = EC.presence_of_element_located((By.ID, elementId)) 56 | WebDriverWait(self._driver, self._LOAD_TIMEOUT).until(element_present) 57 | except TimeoutException: 58 | print 59 | "Timed out waiting for page to load" 60 | 61 | def searchAndPlay(self, keyword): 62 | keywordEncoded = urllib.parse.quote(keyword, safe='') 63 | 64 | self._driver.get(self._YOUTUBE_KIDS_SEARCH_URL.format(keywordEncoded)) 65 | try: 66 | self._wait() 67 | 68 | video = self._driver.find_element_by_class_name('yt-simple-endpoint.ytk-compact-video-renderer') 69 | if video is not None: 70 | video.click() 71 | self._wait() 72 | self._driver.find_element_by_css_selector('#player-fullscreen-button > #icon').click() 73 | 74 | return True 75 | except Exception as ex: 76 | pass 77 | 78 | return False 79 | 80 | def stop(self): 81 | self._driver.find_element_by_css_selector('#player-fullscreen-button > #icon').click() 82 | self._wait() 83 | self._driver.get(self._YOUTUBE_KIDS_HOME_URL) 84 | 85 | #browser = BrowserService() 86 | #time.sleep(3) 87 | #browser.searchAndPlay("bus") 88 | #time.sleep(10) 89 | #browser.stop() 90 | #time.sleep(1000) 91 | 92 | -------------------------------------------------------------------------------- /qrio/browserServiceJetson.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.common.exceptions import TimeoutException 3 | from selenium.webdriver.support.ui import WebDriverWait 4 | from selenium.webdriver.support import expected_conditions as EC 5 | from selenium.webdriver.common.by import By 6 | import time 7 | import urllib 8 | 9 | 10 | class BrowserService(object): 11 | 12 | _CHROME_PATH = '/usr/lib/chromium-browser/chromedriver' 13 | _YOUTUBE_KIDS_HOME_URL = "http://youtubekids.com" 14 | _YOUTUBE_KIDS_SEARCH_URL = "https://www.youtubekids.com/search?q={0}&hl=en-GB" 15 | _LOAD_TIMEOUT = 5 16 | 17 | def __init__(self, windowPos=(3000, 0, 1080, 960)): 18 | self._setup(windowPos) 19 | 20 | def _setup(self, windowPos): 21 | print("Setting up Browser!!") 22 | options = webdriver.ChromeOptions() 23 | #options.add_argument('--remote-debugging-port=9515') 24 | #options.add_argument('--no-sandbox') 25 | #options.add_argument('--disable-dev-shm-usage') 26 | options.binary_location = BrowserService._CHROME_PATH 27 | self._driver = webdriver.Chrome(BrowserService._CHROME_PATH) 28 | self._driver.set_window_position(windowPos[0], windowPos[1]) 29 | self._driver.set_window_size(windowPos[2], windowPos[3]) 30 | 31 | # Open youtubeKids 32 | self._driver.get(BrowserService._YOUTUBE_KIDS_HOME_URL) 33 | self._wait() 34 | self._driver.find_element_by_id('parent-button').click() 35 | self._driver.find_element_by_id('next-button').click() 36 | self._driver.find_element_by_id('onboarding-age-gate-digit-1').send_keys('1') 37 | self._driver.find_element_by_id('onboarding-age-gate-digit-2').send_keys('9') 38 | self._driver.find_element_by_id('onboarding-age-gate-digit-3').send_keys('8') 39 | self._driver.find_element_by_id('onboarding-age-gate-digit-4').send_keys('0') 40 | self._driver.find_element_by_css_selector(".flow-buttons > #submit-button").click() 41 | self._driver.find_element_by_id('show-text-link').click() 42 | self._driver.find_element_by_css_selector('.ytk-kids-flow-text-info-renderer > #next-button').click() 43 | self._driver.find_element_by_id('skip-button').click() 44 | time.sleep(2) 45 | self._driver.find_element_by_css_selector('.ytk-kids-onboarding-parental-notice-page-renderer > #next-button').click() 46 | time.sleep(2) 47 | self._driver.find_element_by_xpath("//img[contains(@src,'https://www.gstatic.com/ytkids/onboarding/content_card_broadway/content_level_age_preschool_normal_282_250.png')]").click() 48 | time.sleep(2) 49 | self._driver.find_element_by_css_selector('#select-link').click() 50 | self._driver.find_element_by_css_selector('#search-on-button').click() 51 | self._driver.find_element_by_css_selector('#done-button').click() 52 | 53 | def _wait(self, elementId='element_id'): 54 | try: 55 | element_present = EC.presence_of_element_located((By.ID, elementId)) 56 | WebDriverWait(self._driver, self._LOAD_TIMEOUT).until(element_present) 57 | except TimeoutException: 58 | print 59 | "Timed out waiting for page to load" 60 | 61 | def searchAndPlay(self, keyword): 62 | keywordEncoded = urllib.parse.quote(keyword, safe='') 63 | 64 | self._driver.get(self._YOUTUBE_KIDS_SEARCH_URL.format(keywordEncoded)) 65 | try: 66 | self._wait() 67 | 68 | video = self._driver.find_element_by_class_name('yt-simple-endpoint.ytk-compact-video-renderer') 69 | if video is not None: 70 | video.click() 71 | self._wait() 72 | self._driver.find_element_by_css_selector('#player-fullscreen-button > #icon').click() 73 | 74 | return True 75 | except Exception as ex: 76 | pass 77 | 78 | return False 79 | 80 | def stop(self): 81 | self._driver.find_element_by_css_selector('#player-fullscreen-button > #icon').click() 82 | self._wait() 83 | self._driver.get(self._YOUTUBE_KIDS_HOME_URL) 84 | 85 | #browser = BrowserService() 86 | #time.sleep(3) 87 | #browser.searchAndPlay("bus") 88 | #time.sleep(10) 89 | #browser.stop() 90 | #time.sleep(1000) 91 | 92 | -------------------------------------------------------------------------------- /misc/objectDetectionOnJetsonNano.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | import tensorflow as tf 5 | from PIL import Image, ImageDraw 6 | 7 | 8 | from utils import visualization_utils as vis_util 9 | from utils import label_map_util 10 | 11 | IM_WIDTH = 720 12 | IM_HEIGHT = 540 13 | 14 | def gstreamer_pipeline (capture_width=IM_WIDTH, capture_height=IM_HEIGHT, display_width=IM_WIDTH, display_height=IM_HEIGHT, framerate=5, flip_method=0) : 15 | return ('nvarguscamerasrc ! ' 16 | 'video/x-raw(memory:NVMM), ' 17 | 'width=(int)%d, height=(int)%d, ' 18 | 'format=(string)NV12, framerate=(fraction)%d/1 ! ' 19 | 'nvvidconv flip-method=%d ! ' 20 | 'video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! ' 21 | 'videoconvert ! ' 22 | 'video/x-raw, format=(string)BGR ! appsink' % (capture_width,capture_height,framerate,flip_method,display_width,display_height)) 23 | 24 | MODEL_NAME = 'toys-4-class-jetsoncam' 25 | LABELS = 'labels.pbtxt' 26 | 27 | NUM_CLASSES = 4 28 | 29 | CWD_PATH = os.getcwd() 30 | 31 | PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, 'frozen_inference_graph.pb') 32 | PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELS) 33 | 34 | label_map = label_map_util.load_labelmap(PATH_TO_LABELS) 35 | categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) 36 | category_index = label_map_util.create_category_index(categories) 37 | 38 | detection_graph = tf.compat.v1.Graph() 39 | with detection_graph.as_default(): 40 | od_graph_def = tf.compat.v1.GraphDef() 41 | with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: 42 | serialized_graph = fid.read() 43 | od_graph_def.ParseFromString(serialized_graph) 44 | tf.import_graph_def(od_graph_def, name='') 45 | 46 | TFSess = tf.compat.v1.Session(graph=detection_graph) 47 | 48 | image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') 49 | detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') 50 | detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') 51 | detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') 52 | 53 | num_detections = detection_graph.get_tensor_by_name('num_detections:0') 54 | 55 | frame_rate_calc = 1 56 | freq = cv2.getTickFrequency() 57 | font = cv2.FONT_HERSHEY_SIMPLEX 58 | 59 | WIN_NAME = 'Detection' 60 | 61 | labelMapFilePath = os.path.join(CWD_PATH, MODEL_NAME, LABELS.split(".")[0] + ".txt") 62 | 63 | with open(labelMapFilePath, "r") as f: 64 | txt = f.read() 65 | labels = txt.split("\n") 66 | 67 | cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=0), cv2.CAP_GSTREAMER) 68 | width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 69 | height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 70 | print("Video dimension width={0} height={1}".format(width, height)) 71 | 72 | if cap.isOpened(): 73 | window_handle = cv2.namedWindow(WIN_NAME, cv2.WINDOW_AUTOSIZE) 74 | 75 | frameCount = 0 76 | minScore = 0.4 77 | 78 | while cv2.getWindowProperty(WIN_NAME, 0) >= 0: 79 | 80 | t1 = cv2.getTickCount() 81 | 82 | ret_val, frame = cap.read(); 83 | if not ret_val: 84 | break 85 | 86 | frame.setflags(write=1) 87 | frame_expanded = np.expand_dims(frame, axis=0) 88 | 89 | (boxes, scores, classes, num) = TFSess.run( 90 | [detection_boxes, detection_scores, detection_classes, num_detections], 91 | feed_dict={image_tensor: frame_expanded}) 92 | 93 | boxesList = np.squeeze(boxes).tolist() 94 | scores = np.squeeze(scores).tolist() 95 | classes = np.squeeze(classes).astype(np.int32).tolist() 96 | 97 | vis_util.visualize_boxes_and_labels_on_image_array( 98 | frame, 99 | np.atleast_2d(np.squeeze(boxes)), 100 | np.atleast_1d(np.squeeze(classes).astype(np.int32)), 101 | np.atleast_1d(np.squeeze(scores)), 102 | category_index, 103 | use_normalized_coordinates=True, 104 | line_thickness=8, 105 | min_score_thresh=minScore) 106 | 107 | cv2.putText(frame,"FPS: {0:.2f}".format(frame_rate_calc), (30, 50), font, 1, (255, 255, 0), 2, cv2.LINE_AA) 108 | cv2.imshow(WIN_NAME, frame) 109 | 110 | t2 = cv2.getTickCount() 111 | time1 = (t2-t1)/freq 112 | frame_rate_calc = 1/time1 113 | 114 | frameCount+=1 115 | 116 | if cv2.waitKey(1) == ord('q'): 117 | break 118 | 119 | cap.release() 120 | 121 | cv2.destroyAllWindows() 122 | -------------------------------------------------------------------------------- /qrio/utilities/rectArea.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | 4 | from .vector import Vector 5 | 6 | 7 | class RectArea(object): 8 | 9 | @staticmethod 10 | def fromPoints(points): 11 | x1 = None 12 | y1 = None 13 | x2 = None 14 | y2 = None 15 | 16 | for x, y in points: 17 | if x1 is None or x1 > x: 18 | x1 = x 19 | if y1 is None or y1 > y: 20 | y1 = y 21 | if x2 is None or x2 < x: 22 | x2 = x 23 | if y2 is None or y2 < y: 24 | y2 = y 25 | 26 | return RectArea(x1, y1, x2, y2) 27 | 28 | def __init__(self, x1, y1, x2, y2): 29 | if x1 < x2: 30 | self.x1 = x1 31 | self.x2 = x2 32 | else: 33 | self.x1 = x2 34 | self.x2 = x1 35 | 36 | if y1 < y2: 37 | self.y1 = y1 38 | self.y2 = y2 39 | else: 40 | self.y1 = y2 41 | self.y2 = y1 42 | 43 | def union(self, anotherRectArea): 44 | newRectArea = RectArea(min(self.x1, anotherRectArea.x1), min(self.y1, anotherRectArea.y1), 45 | max(self.x2, anotherRectArea.x2), max(self.y2, anotherRectArea.y2)) 46 | 47 | return newRectArea 48 | 49 | def intersect(self, anotherRectArea): 50 | if self.isOverlap(anotherRectArea): 51 | return RectArea(max(self.x1, anotherRectArea.x1), max(self.y1, anotherRectArea.y1), 52 | min(self.x2, anotherRectArea.x2), min(self.y2, anotherRectArea.y2)) 53 | 54 | return None 55 | 56 | def area(self): 57 | return (self.x2 - self.x1) * (self.y2 - self.y1) 58 | 59 | def isOverlap(self, anotherRectArea): 60 | if (anotherRectArea.x1 <= self.x2 and anotherRectArea.y1 <= self.y2 and 61 | anotherRectArea.x2 >= self.x1 and anotherRectArea.y2 >= self.y1): 62 | return True 63 | 64 | return False 65 | 66 | def isPointInside(self, x, y): 67 | return x >= self.x1 and x <= self.x2 and y >= self.y1 and y <= self.y2 68 | 69 | def isInside(self, anotherRectArea): 70 | return anotherRectArea.isPointInside(self.x1, self.y1) and anotherRectArea.isPointInside(self.x2, self.y2) 71 | 72 | def aspectRatio(self): 73 | return abs(self.y2 - self.y1) / abs(self.x2 - self.x1) 74 | 75 | def center(self): 76 | return (self.x1 + self.x2) / 2, (self.y1 + self.y2) / 2 77 | 78 | def length(self): 79 | return self.x2 - self.x1 80 | 81 | def scale(self, scale): 82 | return RectArea(self.x1 * scale, self.y1 * scale, self.x2 * scale, self.y2 * scale) 83 | 84 | # Normalise the coordinate system to 0-1 based on the supplied stageWidth, stageHeight 85 | def normalisedFrom(self, stageWidth, stageHeight): 86 | return RectArea(self.x1 / stageWidth, self.y1 / stageHeight, self.x2 / stageWidth, self.y2 / stageHeight) 87 | 88 | # Normalise the coordinate system back from 0-1 to stageWidth, stageHeight 89 | def normalisedTo(self, stageWidth, stageHeight): 90 | return RectArea(self.x1 * stageWidth, self.y1 * stageHeight, self.x2 * stageWidth, self.y2 * stageHeight) 91 | 92 | def round(self): 93 | return RectArea(int(self.x1), int(self.y1), int(self.x2), int(self.y2)) 94 | 95 | def width(self): 96 | return abs(self.x2 - self.x1) 97 | 98 | def height(self): 99 | return abs(self.y2 - self.y1) 100 | 101 | # Return a grown box by scale (keeping the center of the box the same) 102 | def grow(self, scale): 103 | centerX, centerY = self.center() 104 | newWidth = self.width() * scale 105 | newHeight = self.height() * scale 106 | 107 | return RectArea(centerX - newWidth / 2, centerY - newHeight / 2, centerX + newWidth / 2, centerY + newHeight / 2) 108 | 109 | def offset(self, vector: Vector): 110 | return RectArea(self.x1 + vector.x, self.y1 + vector.y, self.x2 + vector.x, self.y2 + vector.y) 111 | 112 | def distanceFromRect(self, anotherRectArea): 113 | ourCenterX, ourCenterY = self.center() 114 | 115 | anotherCenterX, anotherCenterY = anotherRectArea.center() 116 | 117 | return math.sqrt(pow(ourCenterX - anotherCenterX, 2) + pow(ourCenterY - anotherCenterY, 2)) 118 | 119 | def toJson(self): 120 | 121 | return {'x1': self.x1, 122 | 'y1': self.y1, 123 | 'x2': self.x2, 124 | 'y2': self.y2} 125 | 126 | def overlapArea(self, anotherRectArea): 127 | if self.isOverlap(anotherRectArea) is False: 128 | return 0, 0 129 | 130 | xmin = max(self.x1, anotherRectArea.x1) 131 | xmax = min(self.x2, anotherRectArea.x2) 132 | ymin = max(self.y1, anotherRectArea.y1) 133 | ymax = min(self.y2, anotherRectArea.y2) 134 | 135 | area = RectArea(xmin, ymin, xmax, ymax).area() 136 | return area, (area/anotherRectArea.area()) 137 | -------------------------------------------------------------------------------- /qrio/robot.py: -------------------------------------------------------------------------------- 1 | import arcade 2 | import random 3 | 4 | from config import Config 5 | from utilities.vector import Vector 6 | from utilities.affineTransform import AffineTransform 7 | from animation import Animation 8 | from robotPart import RobotPart 9 | from animationCore import AnimationCore 10 | 11 | from morphTargetCore import MorphTargetCore 12 | from fidgetAnimationController import FidgetAnimationController 13 | from morphTargetInterp import MorphTargetInterp 14 | from brainStateMachine import BrainStateMachine 15 | 16 | 17 | class Robot(object): 18 | _MAX_HAS_EYE_TARGET_STATE = 50 # Decide if we should be looking at a person/not based on the last 50 object detection states 19 | 20 | def __init__(self, screenSize): 21 | self._create(screenSize) 22 | 23 | def _create(self, screenSize): 24 | self._spriteList = arcade.SpriteList() 25 | 26 | self._robot = RobotPart(Vector(screenSize[0] / 2, screenSize[1] / 2), "images/dogbot/frame.png", 27 | Config.SPRITE_SCALE, scale=Config.SPRITE_SCALE, name='body-frame') 28 | tail = RobotPart(Vector(255, -45), "images/dogbot/tail.png", Config.SPRITE_SCALE) 29 | self._robot.appendPart(tail) 30 | 31 | self._body = RobotPart(Vector(0, 0), "images/dogbot/body.png", Config.SPRITE_SCALE) 32 | self._robot.appendPart(self._body) 33 | 34 | headFrame = RobotPart(Vector(0, 0), "images/dogbot/frame.png", 1.0) 35 | 36 | self._body.appendPart(headFrame) 37 | leftEar, rightEar = self._createEars() 38 | 39 | headFrame.appendPart(leftEar) 40 | headFrame.appendPart(rightEar) 41 | 42 | head = RobotPart(Vector(-20, 0), "images/dogbot/head.png", Config.SPRITE_SCALE) 43 | headFrame.appendPart(head) 44 | 45 | leftEyeBase = self._createEye(Vector(-85, 320), "left-eye") 46 | head.appendPart(leftEyeBase) 47 | 48 | rightEyeBase = self._createEye(Vector(60, 320), "right-eye") 49 | head.appendPart(rightEyeBase) 50 | 51 | allSprites = self._robot.spriteList() 52 | 53 | for sprite in allSprites: 54 | self._spriteList.append(sprite) 55 | 56 | self._fidgetAnimation = FidgetAnimationController() 57 | 58 | self._createEyeTargetAnimation() 59 | 60 | self._brainState = BrainStateMachine() 61 | 62 | def _createEyeTargetAnimation(self): 63 | self._eyeTargetAnim = MorphTargetInterp() 64 | 65 | blackOffsetLeft = -19 66 | whiteOffsetLeft = blackOffsetLeft * 0.8 67 | blackOffsetRight = 15 68 | whiteOffsetRight = blackOffsetRight * 0.8 69 | 70 | blackOffsetTop = -15 71 | whiteOffsetTop = blackOffsetTop * 0.8 72 | blackOffsetBottom = 10 73 | whiteOffsetBottom = blackOffsetBottom * 0.8 74 | 75 | morphTarget = MorphTargetCore({'left-eye/white': Vector(whiteOffsetLeft, whiteOffsetTop), 76 | 'left-eye/black': Vector(blackOffsetLeft, blackOffsetTop), 77 | 'right-eye/white': Vector(whiteOffsetLeft, whiteOffsetTop), 78 | 'right-eye/black': Vector(blackOffsetLeft, blackOffsetTop) 79 | }) 80 | 81 | self._eyeTargetAnim.addMorpTarget(morphTarget, Vector(-0.5, -0.5)) 82 | 83 | morphTarget = MorphTargetCore({'left-eye/white': Vector(whiteOffsetRight, whiteOffsetTop), 84 | 'left-eye/black': Vector(blackOffsetRight, blackOffsetTop), 85 | 'right-eye/white': Vector(whiteOffsetRight, whiteOffsetTop), 86 | 'right-eye/black': Vector(blackOffsetRight, blackOffsetTop) 87 | }) 88 | self._eyeTargetAnim.addMorpTarget(morphTarget, Vector(0.5, -0.5)) 89 | 90 | morphTarget = MorphTargetCore({'left-eye/white': Vector(whiteOffsetRight, whiteOffsetBottom), 91 | 'left-eye/black': Vector(blackOffsetRight, blackOffsetBottom), 92 | 'right-eye/white': Vector(whiteOffsetRight, whiteOffsetBottom), 93 | 'right-eye/black': Vector(blackOffsetRight, blackOffsetBottom) 94 | }) 95 | self._eyeTargetAnim.addMorpTarget(morphTarget, Vector(0.5, 0.5)) 96 | 97 | morphTarget = MorphTargetCore({'left-eye/white': Vector(whiteOffsetLeft, whiteOffsetBottom), 98 | 'left-eye/black': Vector(blackOffsetLeft, blackOffsetBottom), 99 | 'right-eye/white': Vector(whiteOffsetLeft, whiteOffsetBottom), 100 | 'right-eye/black': Vector(blackOffsetLeft, blackOffsetBottom) 101 | }) 102 | self._eyeTargetAnim.addMorpTarget(morphTarget, Vector(-0.5, 0.5)) 103 | 104 | self._hasEyeTargetState = [] 105 | 106 | def _createFace(self): 107 | return arcade.Sprite("images/dogbot/head.png", Config.SPRITE_SCALE) 108 | 109 | def _createEye(self, pos, namePrefix): 110 | eyeBase = RobotPart(pos, "images/dogbot/eye-white.png", Config.SPRITE_SCALE, name=namePrefix + "/white") 111 | eyeBlack = RobotPart(Vector(0, -10), "images/dogbot/eye-black.png", Config.SPRITE_SCALE, name=namePrefix + "/black") 112 | eyeBrow = RobotPart(Vector(-10, 70), "images/dogbot/eyebrow.png", Config.SPRITE_SCALE, name=namePrefix + "/eyebrow") 113 | 114 | eyeBase.appendPart(eyeBlack) 115 | eyeBase.appendPart(eyeBrow) 116 | 117 | return eyeBase 118 | 119 | def _createEars(self): 120 | leftEar = RobotPart(Vector(-180, 369), "images/dogbot/left-ear.png", Config.SPRITE_SCALE, name='left-ear') 121 | rightEar = RobotPart(Vector(192, 370), "images/dogbot/right-ear.png", Config.SPRITE_SCALE, name='right-ear') 122 | 123 | return leftEar, rightEar 124 | 125 | def addMorphTarget(self, morphTarget): 126 | # self._morphTargets.append(morphTarget) 127 | pass 128 | 129 | def spriteList(self): 130 | return self._spriteList 131 | 132 | def _updateAnimation(self): 133 | return self._fidgetAnimation.update() 134 | 135 | def _updateAnimationState(self, eyeTargetVecParam): 136 | self._hasEyeTargetState.append(eyeTargetVecParam is not None) 137 | # Keep the last n state 138 | if len(self._hasEyeTargetState) > self._MAX_HAS_EYE_TARGET_STATE: 139 | self._hasEyeTargetState = self._hasEyeTargetState[-self._MAX_HAS_EYE_TARGET_STATE:] 140 | 141 | # Work out the state 142 | totalHasEyeTarget = 0 143 | for curState in self._hasEyeTargetState: 144 | if curState: 145 | totalHasEyeTarget += 1 146 | 147 | newHaveEyeTarget = False 148 | if totalHasEyeTarget > len(self._hasEyeTargetState) / 2: 149 | newHaveEyeTarget = True 150 | 151 | self._brainState.update(newHaveEyeTarget) 152 | 153 | def update(self, lastFrameCaptured): 154 | self._brainState.update(lastFrameCaptured) 155 | 156 | eyeTargetVecParam, weight = self._brainState.getEyeTargetParam() 157 | # eyeTargetVecParam = None 158 | 159 | poseDict, rotDict = self._updateAnimation() 160 | if eyeTargetVecParam is not None: 161 | eyeTargetPoseDict = self._eyeTargetAnim.getPosAtVecParamForQuad(eyeTargetVecParam) 162 | else: 163 | eyeTargetPoseDict = {} 164 | 165 | for name, vec in poseDict.items(): 166 | if name in eyeTargetPoseDict: 167 | if name in poseDict: 168 | poseVec = poseDict[name].scale(1.0 - weight) 169 | eyeTargetVec = eyeTargetPoseDict[name].scale(weight) 170 | poseDict[name] = poseVec.add(eyeTargetVec) 171 | else: 172 | poseDict[name] = eyeTargetPoseDict[name] 173 | 174 | t = AffineTransform() 175 | self._robot.update(t, poseDict, rotDict) 176 | -------------------------------------------------------------------------------- /qrio/fidgetAnimationController.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | from morphTargetCore import MorphTargetCore 5 | from utilities.vector import Vector 6 | from animationCore import AnimationCore 7 | from config import Config 8 | 9 | 10 | class FidgetAnimationController(object): 11 | 12 | _FPS = int(max(1, Config.PLAYBACK_FPS)) 13 | 14 | def __init__(self): 15 | self._animationList = [] 16 | 17 | self._eyeAnimationCtr = 0 18 | self._leftEarAnimationCtr = 0 19 | self._rightEarAnimationCtr = 0 20 | self._headAnimationCtr = 0 21 | self._tailAnimationCtr = 0 22 | 23 | self._animationList.append((None, self._createEyeFidgetAnimation)) 24 | self._animationList.append((None, self._createLeftEarFidgetAnimation)) 25 | self._animationList.append((None, self._createRightEarFidgetAnimation)) 26 | self._animationList.append((None, self._createHeadFidgetAnimation)) 27 | self._animationList.append((None, self._createTailFidgetAnimation)) 28 | 29 | for idx, (anim, createFunc) in enumerate(self._animationList): 30 | anim = createFunc() 31 | self._animationList[idx] = (anim, createFunc) 32 | 33 | def _createEyeFidgetAnimation(self, prevAnim=None): 34 | if prevAnim is not None: 35 | morphTargetAtStart = prevAnim.getMorphTargetEnd() 36 | else: 37 | morphTargetAtStart = MorphTargetCore({ 38 | 'left-eye/white': Vector(0, 0), 39 | 'left-eye/black': Vector(0, 0), 40 | 'right-eye/white': Vector(0, 0), 41 | 'right-eye/black': Vector(0, 0) 42 | }) 43 | 44 | eyeTargetOffsetX = random.randrange(-15, 15) 45 | eyeTargetOffsetY = random.randrange(-15, 5) 46 | 47 | morphTargetAtStop = MorphTargetCore({ 48 | 'left-eye/black': Vector(eyeTargetOffsetX, eyeTargetOffsetY), 49 | 'left-eye/white': Vector(int(eyeTargetOffsetX * 0.8), int(eyeTargetOffsetY * 0.8)), 50 | 'right-eye/black': Vector(eyeTargetOffsetX, eyeTargetOffsetY), 51 | 'right-eye/white': Vector(int(eyeTargetOffsetX * 0.8), 52 | int(eyeTargetOffsetY * 0.8)) 53 | }) 54 | # Pause every x second 55 | pauseAtStart = random.randrange(self._FPS * 2, self._FPS * 4) 56 | self._eyeAnimationCtr += 1 57 | 58 | return AnimationCore(morphTargetAtStart, morphTargetAtStop, Config.speedScale(0.03), pauseAtStart) 59 | 60 | def _createHeadFidgetAnimation(self, prevAnim=None): 61 | if prevAnim is not None: 62 | morphTargetAtStart = prevAnim.getMorphTargetEnd() 63 | else: 64 | morphTargetAtStart = MorphTargetCore({'frame': Vector(0, 0)}, 65 | {'frame': 0}) 66 | 67 | headTargetOffsetY = random.randrange(-5, 4) 68 | headRotationOffset = random.randrange(-10, 10) 69 | morphTargetAtStop = MorphTargetCore({'frame': Vector(0, int(headTargetOffsetY)) 70 | }, 71 | {'frame': headRotationOffset 72 | }) 73 | # Pause every x second 74 | pauseAtStart = random.randrange(self._FPS * 1, self._FPS * 3) 75 | self._headAnimationCtr += 1 76 | 77 | return AnimationCore(morphTargetAtStart, morphTargetAtStop, Config.speedScale(0.02), pauseAtStart) 78 | 79 | def _createTailFidgetAnimation(self, prevAnim=None): 80 | if prevAnim is not None: 81 | morphTargetAtStart = prevAnim.getMorphTargetEnd() 82 | morphTargetAtStop = prevAnim.getMorphTargetStart() 83 | else: 84 | morphTargetAtStart = MorphTargetCore({'tail': Vector(0, 0)}, 85 | {'tail': 0}) 86 | tailRotationOffset = random.randrange(-30, -20) 87 | morphTargetAtStop = MorphTargetCore({'tail': Vector(0, 0) 88 | }, 89 | {'tail': tailRotationOffset 90 | }) 91 | 92 | pauseAtStart = 0 93 | if self._tailAnimationCtr % 6 == 0: 94 | # Pause every x second 95 | pauseAtStart = random.randrange(self._FPS * 3, self._FPS * 8) 96 | 97 | self._tailAnimationCtr += 1 98 | 99 | return AnimationCore(morphTargetAtStart, morphTargetAtStop, Config.speedScale(0.3), pauseAtStart) 100 | 101 | def _createLeftEarFidgetAnimation(self, prevAnim=None): 102 | # Alternate moving from start to stop 103 | if prevAnim is not None: 104 | morphTargetAtStart = prevAnim.getMorphTargetEnd() 105 | morphTargetAtStop = prevAnim.getMorphTargetStart() 106 | else: 107 | morphTargetAtStart = MorphTargetCore({'left-ear': Vector(0, 0)}, 108 | {'left-ear': 0}) 109 | earTargetOffsetX = random.randrange(-2, 2) 110 | earTargetOffsetY = random.randrange(20, 30) 111 | morphTargetAtStop = MorphTargetCore({'left-ear': Vector(earTargetOffsetX, earTargetOffsetY)}, 112 | {'left-ear': -10}) 113 | 114 | pauseAtStart = 0 115 | if self._leftEarAnimationCtr % 4 == 0: 116 | # Pause every x second 117 | pauseAtStart = random.randrange(self._FPS * 3, self._FPS * 8) 118 | 119 | self._leftEarAnimationCtr += 1 120 | 121 | return AnimationCore(morphTargetAtStart, morphTargetAtStop, Config.speedScale(0.2), pauseAtStart) 122 | 123 | def _createRightEarFidgetAnimation(self, prevAnim=None): 124 | # Alternate moving from start to stop 125 | if prevAnim is not None: 126 | morphTargetAtStart = prevAnim.getMorphTargetEnd() 127 | morphTargetAtStop = prevAnim.getMorphTargetStart() 128 | else: 129 | morphTargetAtStart = MorphTargetCore({'right-ear': Vector(0, 0)}, 130 | {'right-ear': 0}) 131 | earTargetOffsetX = random.randrange(-2, 2) 132 | earTargetOffsetY = random.randrange(20, 30) 133 | morphTargetAtStop = MorphTargetCore({'right-ear': Vector(earTargetOffsetX, earTargetOffsetY)}, 134 | {'right-ear': 10}) 135 | 136 | pauseAtStart = 0 137 | if self._rightEarAnimationCtr % 4 == 0: 138 | # Pause every x second 139 | pauseAtStart = random.randrange(self._FPS * 3, self._FPS * 8) 140 | 141 | self._rightEarAnimationCtr += 1 142 | 143 | return AnimationCore(morphTargetAtStart, morphTargetAtStop, Config.speedScale(0.2), pauseAtStart) 144 | 145 | def update(self): 146 | finalPoseDict = {} 147 | finalRotDict = {} 148 | 149 | for idx, (anim, createFunc) in enumerate(self._animationList): 150 | poseDict, rotDict = anim.update() 151 | 152 | if poseDict is None: 153 | anim = createFunc(anim) 154 | self._animationList[idx] = (anim, createFunc) 155 | poseDict, rotDict = anim.update() 156 | 157 | for name, vec in poseDict.items(): 158 | if name in finalPoseDict: 159 | finalPoseDict[name].add(vec) 160 | else: 161 | finalPoseDict[name] = vec 162 | 163 | for name, vec in rotDict.items(): 164 | if name in finalRotDict: 165 | finalRotDict[name].add(vec) 166 | else: 167 | finalRotDict[name] = vec 168 | 169 | return finalPoseDict, finalRotDict 170 | -------------------------------------------------------------------------------- /qrio/tfutils/label_map_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Label map utility functions.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import logging 22 | 23 | from six.moves import range 24 | import tensorflow as tf 25 | from google.protobuf import text_format 26 | from object_detection.protos import string_int_label_map_pb2 27 | 28 | 29 | def _validate_label_map(label_map): 30 | """Checks if a label map is valid. 31 | 32 | Args: 33 | label_map: StringIntLabelMap to validate. 34 | 35 | Raises: 36 | ValueError: if label map is invalid. 37 | """ 38 | for item in label_map.item: 39 | if item.id < 0: 40 | raise ValueError('Label map ids should be >= 0.') 41 | if (item.id == 0 and item.name != 'background' and 42 | item.display_name != 'background'): 43 | raise ValueError('Label map id 0 is reserved for the background label') 44 | 45 | 46 | def create_category_index(categories): 47 | """Creates dictionary of COCO compatible categories keyed by category id. 48 | 49 | Args: 50 | categories: a list of dicts, each of which has the following keys: 51 | 'id': (required) an integer id uniquely identifying this category. 52 | 'name': (required) string representing category name 53 | e.g., 'cat', 'dog', 'pizza'. 54 | 55 | Returns: 56 | category_index: a dict containing the same entries as categories, but keyed 57 | by the 'id' field of each category. 58 | """ 59 | category_index = {} 60 | for cat in categories: 61 | category_index[cat['id']] = cat 62 | return category_index 63 | 64 | 65 | def get_max_label_map_index(label_map): 66 | """Get maximum index in label map. 67 | 68 | Args: 69 | label_map: a StringIntLabelMapProto 70 | 71 | Returns: 72 | an integer 73 | """ 74 | return max([item.id for item in label_map.item]) 75 | 76 | 77 | def convert_label_map_to_categories(label_map, 78 | max_num_classes, 79 | use_display_name=True): 80 | """Given label map proto returns categories list compatible with eval. 81 | 82 | This function converts label map proto and returns a list of dicts, each of 83 | which has the following keys: 84 | 'id': (required) an integer id uniquely identifying this category. 85 | 'name': (required) string representing category name 86 | e.g., 'cat', 'dog', 'pizza'. 87 | We only allow class into the list if its id-label_id_offset is 88 | between 0 (inclusive) and max_num_classes (exclusive). 89 | If there are several items mapping to the same id in the label map, 90 | we will only keep the first one in the categories list. 91 | 92 | Args: 93 | label_map: a StringIntLabelMapProto or None. If None, a default categories 94 | list is created with max_num_classes categories. 95 | max_num_classes: maximum number of (consecutive) label indices to include. 96 | use_display_name: (boolean) choose whether to load 'display_name' field as 97 | category name. If False or if the display_name field does not exist, uses 98 | 'name' field as category names instead. 99 | 100 | Returns: 101 | categories: a list of dictionaries representing all possible categories. 102 | """ 103 | categories = [] 104 | list_of_ids_already_added = [] 105 | if not label_map: 106 | label_id_offset = 1 107 | for class_id in range(max_num_classes): 108 | categories.append({ 109 | 'id': class_id + label_id_offset, 110 | 'name': 'category_{}'.format(class_id + label_id_offset) 111 | }) 112 | return categories 113 | for item in label_map.item: 114 | if not 0 < item.id <= max_num_classes: 115 | logging.info( 116 | 'Ignore item %d since it falls outside of requested ' 117 | 'label range.', item.id) 118 | continue 119 | if use_display_name and item.HasField('display_name'): 120 | name = item.display_name 121 | else: 122 | name = item.name 123 | if item.id not in list_of_ids_already_added: 124 | list_of_ids_already_added.append(item.id) 125 | categories.append({'id': item.id, 'name': name}) 126 | return categories 127 | 128 | 129 | def load_labelmap(path): 130 | """Loads label map proto. 131 | 132 | Args: 133 | path: path to StringIntLabelMap proto text file. 134 | Returns: 135 | a StringIntLabelMapProto 136 | """ 137 | with tf.gfile.GFile(path, 'r') as fid: 138 | label_map_string = fid.read() 139 | label_map = string_int_label_map_pb2.StringIntLabelMap() 140 | try: 141 | text_format.Merge(label_map_string, label_map) 142 | except text_format.ParseError: 143 | label_map.ParseFromString(label_map_string) 144 | _validate_label_map(label_map) 145 | return label_map 146 | 147 | 148 | def get_label_map_dict(label_map_path, 149 | use_display_name=False, 150 | fill_in_gaps_and_background=False): 151 | """Reads a label map and returns a dictionary of label names to id. 152 | 153 | Args: 154 | label_map_path: path to StringIntLabelMap proto text file. 155 | use_display_name: whether to use the label map items' display names as keys. 156 | fill_in_gaps_and_background: whether to fill in gaps and background with 157 | respect to the id field in the proto. The id: 0 is reserved for the 158 | 'background' class and will be added if it is missing. All other missing 159 | ids in range(1, max(id)) will be added with a dummy class name 160 | ("class_") if they are missing. 161 | 162 | Returns: 163 | A dictionary mapping label names to id. 164 | 165 | Raises: 166 | ValueError: if fill_in_gaps_and_background and label_map has non-integer or 167 | negative values. 168 | """ 169 | label_map = load_labelmap(label_map_path) 170 | label_map_dict = {} 171 | for item in label_map.item: 172 | if use_display_name: 173 | label_map_dict[item.display_name] = item.id 174 | else: 175 | label_map_dict[item.name] = item.id 176 | 177 | if fill_in_gaps_and_background: 178 | values = set(label_map_dict.values()) 179 | 180 | if 0 not in values: 181 | label_map_dict['background'] = 0 182 | if not all(isinstance(value, int) for value in values): 183 | raise ValueError('The values in label map must be integers in order to' 184 | 'fill_in_gaps_and_background.') 185 | if not all(value >= 0 for value in values): 186 | raise ValueError('The values in the label map must be positive.') 187 | 188 | if len(values) != max(values) + 1: 189 | # there are gaps in the labels, fill in gaps. 190 | for value in range(1, max(values)): 191 | if value not in values: 192 | # TODO(rathodv): Add a prefix 'class_' here once the tool to generate 193 | # teacher annotation adds this prefix in the data. 194 | label_map_dict[str(value)] = value 195 | 196 | return label_map_dict 197 | 198 | 199 | def create_categories_from_labelmap(label_map_path, use_display_name=True): 200 | """Reads a label map and returns categories list compatible with eval. 201 | 202 | This function converts label map proto and returns a list of dicts, each of 203 | which has the following keys: 204 | 'id': an integer id uniquely identifying this category. 205 | 'name': string representing category name e.g., 'cat', 'dog'. 206 | 207 | Args: 208 | label_map_path: Path to `StringIntLabelMap` proto text file. 209 | use_display_name: (boolean) choose whether to load 'display_name' field 210 | as category name. If False or if the display_name field does not exist, 211 | uses 'name' field as category names instead. 212 | 213 | Returns: 214 | categories: a list of dictionaries representing all possible categories. 215 | """ 216 | label_map = load_labelmap(label_map_path) 217 | max_num_classes = max(item.id for item in label_map.item) 218 | return convert_label_map_to_categories(label_map, max_num_classes, 219 | use_display_name) 220 | 221 | 222 | def create_category_index_from_labelmap(label_map_path, use_display_name=True): 223 | """Reads a label map and returns a category index. 224 | 225 | Args: 226 | label_map_path: Path to `StringIntLabelMap` proto text file. 227 | use_display_name: (boolean) choose whether to load 'display_name' field 228 | as category name. If False or if the display_name field does not exist, 229 | uses 'name' field as category names instead. 230 | 231 | Returns: 232 | A category index, which is a dictionary that maps integer ids to dicts 233 | containing categories, e.g. 234 | {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}, ...} 235 | """ 236 | categories = create_categories_from_labelmap(label_map_path, use_display_name) 237 | return create_category_index(categories) 238 | 239 | 240 | def create_class_agnostic_category_index(): 241 | """Creates a category index with a single `object` class.""" 242 | return {1: {'id': 1, 'name': 'object'}} 243 | -------------------------------------------------------------------------------- /qrio/objectDetection.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | from config import Config 5 | 6 | if Config.ENABLE_TF: 7 | import tensorflow as tf 8 | from tfutils import visualization_utils as vis_util 9 | from tfutils import label_map_util 10 | 11 | from PIL import Image, ImageDraw 12 | 13 | from utilities.jsonFile import JsonFile 14 | from utilities.rectArea import RectArea 15 | from utilities.vector import Vector 16 | 17 | 18 | def gstreamer_pipeline( 19 | capture_width=720, 20 | capture_height=540, 21 | display_width=360, 22 | display_height=270, 23 | framerate=5, 24 | flip_method=0, 25 | ): 26 | return ( 27 | "nvarguscamerasrc ! " 28 | "video/x-raw(memory:NVMM), " 29 | "width=(int)%d, height=(int)%d, " 30 | "format=(string)NV12, framerate=(fraction)%d/1 ! " 31 | "nvvidconv flip-method=%d ! " 32 | "video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! " 33 | "videoconvert ! " 34 | "video/x-raw, format=(string)BGR ! appsink" 35 | % ( 36 | capture_width, 37 | capture_height, 38 | framerate, 39 | flip_method, 40 | display_width, 41 | display_height, 42 | ) 43 | ) 44 | 45 | class ObjectName(object): 46 | Person = "person" 47 | 48 | 49 | class ObjectCaptured(object): 50 | _MIN_PERSON_LENGTH = 5 / 100 # 5% of screen width 51 | _MIN_OBJ_LENGTH = 5 / 100 # 5% of screen width 52 | 53 | def __init__(self, name, boundingBox, confScore): 54 | self.name = name 55 | self.boundingBox = boundingBox 56 | self.confScore = confScore 57 | 58 | def getEstimatedDistance(self): 59 | # Distance is the inverse of bounding box length 60 | return 1.0 / self.boundingBox.length() 61 | 62 | def getEyeCenter(self): 63 | return Vector(self.boundingBox.center()[0], self.boundingBox.y1 + self.boundingBox.height() * 0.1) 64 | 65 | def isBigEnough(self): 66 | if self.name == ObjectName.Person and self.boundingBox.length() >= self._MIN_PERSON_LENGTH: 67 | return True 68 | 69 | if self.boundingBox.length() >= self._MIN_OBJ_LENGTH: 70 | return True 71 | 72 | return False 73 | 74 | 75 | class FrameCaptured(object): 76 | 77 | def __init__(self): 78 | self._objects = [] 79 | 80 | def addObject(self, objCaptured): 81 | self._objects.append(objCaptured) 82 | 83 | def findExistingObject(self, existingObj, objectName=ObjectName.Person, exclusionNames={}): 84 | for obj in self._objects: 85 | if objectName is not None and obj.name != objectName: 86 | continue 87 | if obj.name in exclusionNames: 88 | continue 89 | if existingObj.boundingBox.isOverlap(obj.boundingBox): 90 | return obj, obj.getEstimatedDistance() 91 | 92 | return None, None 93 | 94 | def findNewObject(self, objectName=ObjectName.Person, exclusionNames={}): 95 | for obj in self._objects: 96 | if objectName is not None and obj.name != objectName: 97 | continue 98 | if obj.name in exclusionNames: 99 | continue 100 | if obj.isBigEnough(): 101 | return obj, obj.getEstimatedDistance() 102 | 103 | return None, None 104 | 105 | 106 | class ObjectDetection(object): 107 | 108 | def __init__(self): 109 | self._cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=0), cv2.CAP_GSTREAMER) 110 | self._lastFrameCaptured = None 111 | self._ctr = 0 112 | if self._cap.isOpened(): 113 | print("Successfully open camera source") 114 | self._windowHandle = cv2.namedWindow("Camera", cv2.WINDOW_AUTOSIZE) 115 | 116 | self._setup() 117 | 118 | def _setup(self): 119 | MODEL_NAME = 'toys-4-class-jetsoncam' 120 | LABELS = 'labels.pbtxt' 121 | 122 | CWD_PATH = os.getcwd() 123 | 124 | PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, 'frozen_inference_graph.pb') 125 | 126 | detection_graph = tf.compat.v1.Graph() 127 | with detection_graph.as_default(): 128 | od_graph_def = tf.compat.v1.GraphDef() 129 | with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: 130 | serialized_graph = fid.read() 131 | od_graph_def.ParseFromString(serialized_graph) 132 | tf.import_graph_def(od_graph_def, name='') 133 | 134 | self._TFSess = tf.compat.v1.Session(graph=detection_graph) 135 | 136 | self._image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') 137 | 138 | self._detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') 139 | 140 | self._detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') 141 | self._detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') 142 | 143 | self._num_detections = detection_graph.get_tensor_by_name('num_detections:0') 144 | 145 | labelMapFilePath = os.path.join(CWD_PATH, MODEL_NAME, LABELS.split(".")[0] + ".txt") 146 | 147 | with open(labelMapFilePath, "r") as f: 148 | txt = f.read() 149 | self._labels = txt.split("\n") 150 | 151 | def _update(self): 152 | minScore = 0.4 153 | ret_val, frame = self._cap.read() 154 | print("Got video frame {0}".format(frame.shape)) 155 | 156 | width = frame.shape[1] 157 | height = frame.shape[0] 158 | 159 | frame.setflags(write=1) 160 | frame_expanded = np.expand_dims(frame, axis=0) 161 | 162 | (boxes, scores, classes, num) = self._TFSess.run( 163 | [self._detection_boxes, self._detection_scores, self._detection_classes, self._num_detections], 164 | feed_dict={self._image_tensor: frame_expanded}) 165 | 166 | boxesList = np.squeeze(boxes).tolist() 167 | scores = np.squeeze(scores).tolist() 168 | classes = np.squeeze(classes).astype(np.int32).tolist() 169 | 170 | objList = [] 171 | 172 | for i, box in enumerate(boxesList): 173 | score = scores[i] 174 | if score < minScore: 175 | continue 176 | 177 | classIdx = classes[i] - 1 178 | label = self._labels[classIdx] 179 | x1 = int(box[1] * width) 180 | y1 = int(box[0] * height) 181 | x2 = int(box[3] * width) 182 | y2 = int(box[2] * height) 183 | objList.append({"class": label, "box": {"x1": x1, "y1": y1, "x2": x2, "y2": y2}, "score": score}) 184 | 185 | self._lastFrameCaptured = FrameCaptured() 186 | 187 | for obj in objList: 188 | x1 = 1.0 - obj['box']['x1'] / width 189 | y1 = 1.0 - obj['box']['y1'] / height 190 | x2 = 1.0 - obj['box']['x2'] / width 191 | y2 = 1.0 - obj['box']['y2'] / height 192 | objName = obj['class'] 193 | if objName == 'face': 194 | objName = ObjectName.Person 195 | 196 | self._lastFrameCaptured.addObject(ObjectCaptured(objName, RectArea(x1, y1, x2, y2), obj['score'])) 197 | 198 | cv2.imshow("Camera", frame) 199 | 200 | def getLastFrameCaptured(self): 201 | self._update() 202 | return self._lastFrameCaptured 203 | 204 | def findExistingObject(self, existingObj): 205 | return self._lastFrameCaptured.findExistingObject(existingObj) 206 | 207 | def findNewObject(self): 208 | return self._lastFrameCaptured.findNewObject() 209 | 210 | 211 | class ObjectDetectionOffline(object): 212 | 213 | def __init__(self, movieFilePath, objDetectCacheFolder): 214 | self._cap = cv2.VideoCapture(movieFilePath) 215 | self._objDetectCacheFolder = objDetectCacheFolder 216 | self._lastFrameCaptured = None 217 | self._ctr = -1 218 | self._frameCtr = 0 219 | self._speedFactor = 1 220 | if self._cap.isOpened(): 221 | print("Successfully open camera source") 222 | self._windowHandle = cv2.namedWindow("Camera", cv2.WINDOW_AUTOSIZE) 223 | 224 | def _readOfflineCache(self, ctr): 225 | filePath = os.path.join(self._objDetectCacheFolder, "frame-{0:04}.txt".format(ctr)) 226 | jsonObj = JsonFile.jsonFromFile(filePath) 227 | 228 | return jsonObj 229 | 230 | def _update(self): 231 | self._ctr += 1 232 | 233 | ret_val, img = self._cap.read() 234 | print("Got video frame {0}".format(img.shape)) 235 | cv2.imshow("Camera", img) 236 | width = img.shape[1] 237 | height = img.shape[0] 238 | 239 | self._lastFrameCaptured = FrameCaptured() 240 | 241 | jsonObj = self._readOfflineCache(self._frameCtr) 242 | 243 | for obj in jsonObj['objects']: 244 | x1 = obj['box']['x1'] / width 245 | y1 = 1.0 - obj['box']['y1'] / height 246 | x2 = obj['box']['x2'] / width 247 | y2 = 1.0 - obj['box']['y2'] / height 248 | objName = obj['class'] 249 | if objName == 'face': 250 | objName = ObjectName.Person 251 | 252 | self._lastFrameCaptured.addObject(ObjectCaptured(objName, RectArea(x1, y1, x2, y2), obj['score'])) 253 | self._frameCtr += 1 254 | 255 | def getLastFrameCaptured(self): 256 | self._update() 257 | return self._lastFrameCaptured 258 | 259 | def findExistingObject(self, existingObj): 260 | return self._lastFrameCaptured.findExistingObject(existingObj) 261 | 262 | def findNewObject(self): 263 | return self._lastFrameCaptured.findNewObject() 264 | 265 | 266 | class ObjectDetectionFake(object): 267 | 268 | def __init__(self): 269 | self._lastFrameCaptured = None 270 | self._ctr = 0 271 | 272 | def _update(self): 273 | self._lastFrameCaptured = FrameCaptured() 274 | 275 | self._ctr += 1 276 | mod = self._ctr % 2500 277 | 278 | # Simulate seeing a person at these locations 279 | if mod < 250: 280 | self._lastFrameCaptured.addObject(ObjectCaptured(ObjectName.Person, RectArea(0.0, 0.0, 0.1, 0.1), 0.9)) 281 | elif mod < 500: 282 | self._lastFrameCaptured.addObject(ObjectCaptured(ObjectName.Person, RectArea(0.9, 0.9, 1.0, 1.0), 0.9)) 283 | elif mod < 750: 284 | self._lastFrameCaptured.addObject(ObjectCaptured(ObjectName.Person, RectArea(0.9, 0.0, 1.0, 0.1), 0.9)) 285 | elif mod < 1500: 286 | # Simulate seeing a person and a horse 287 | self._lastFrameCaptured.addObject(ObjectCaptured(ObjectName.Person, RectArea(0.0, 0.9, 0.1, 1.0), 0.9)) 288 | self._lastFrameCaptured.addObject(ObjectCaptured("Horse", RectArea(0.0, 0.9, 0.1, 1.0), 0.9)) 289 | elif mod < 1900: 290 | # Simulate seeing a person and a giraffe 291 | self._lastFrameCaptured.addObject(ObjectCaptured(ObjectName.Person, RectArea(0.0, 0.9, 0.1, 1.0), 0.9)) 292 | self._lastFrameCaptured.addObject(ObjectCaptured("Giraffe", RectArea(0.0, 0.9, 0.1, 1.0), 0.9)) 293 | else: 294 | pass 295 | 296 | 297 | def getLastFrameCaptured(self): 298 | self._update() 299 | return self._lastFrameCaptured 300 | 301 | def findExistingObject(self, existingObj): 302 | return self._lastFrameCaptured.findExistingObject(existingObj) 303 | 304 | def findNewObject(self): 305 | return self._lastFrameCaptured.findNewObject() 306 | -------------------------------------------------------------------------------- /qrio/brainStateMachine.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from intent import Intent 4 | from eyeCoordMapper import EyeCoordMapper 5 | from objectDetection import ObjectName 6 | from utilities.stopwatch import Stopwatch 7 | from config import Config 8 | 9 | 10 | class BrainState(object): 11 | Idle = "Idle" 12 | Engaging = "Engaging" 13 | Conversing = "Conversing" 14 | ObjectRecognised = "ObjectRecognised" 15 | PlayingVideo = "PlayingVideo" 16 | FinishPlayingVideo = "FinishPlayingVideo" 17 | FailedPlayingVideo = "FailedPlayingVideo" 18 | AskToBringAnotherObject = "AskToBringAnotherObject" 19 | 20 | 21 | class TargetName(object): 22 | Other = "Other" 23 | Yumi = "Yumi" 24 | Gus = "Gus" 25 | Dexie = "Dexie" 26 | 27 | 28 | class TargetDistance(object): 29 | Close = "Close" 30 | Far = "Far" 31 | 32 | 33 | class BrainStateMachine(object): 34 | _MAX_ENGAGED_TARGET_HISTORY = 5 35 | # We don't want to scale the focus_eye_speed excesively so keep it at 0.2 36 | _FOCUS_EYE_SPEED = 0.2 37 | _DISSENGAGED_PATIENCE = Config.frameScale(1000) # If no target for 1000 iterations then totally disengaged 38 | _EYE_MOVEMENT_SMOOTHING_FACTOR = 0.2 # The higher the slower the eye will move to new position 39 | _TIME_TO_FULLY_ENGAGED = Config.frameScale(300) # Start asking to bring object after the start of engagement state 40 | _DELAY_BEFORE_ASKING_REPEAT_QUESTION = Config.frameScale(30) # Ask to bring an object after x second not seeing any 41 | _LOCK_OBJECT_PATIENCE = Config.frameScale(180) # If person is no longer visible for 100 iterations, unlock object 42 | _TIMES_SEEN_TO_CONSIDER_OBJECT_REAL = 3 # An object has to be seen 20/60 frames to be considered as a real object 43 | _NO_EYE_TARGET_PATIENCE = Config.frameScale(120) # If no target to see in x frames start doing eye fidget 44 | 45 | def __init__(self): 46 | self._state = BrainState.Idle 47 | self._currentStateTime = None 48 | self._engagedTargets = [] 49 | self._currentTarget = None 50 | self._engagementWeight = 0.0 51 | self._timeToDissengaged = 0 52 | self._sameObjectWasDetected = False 53 | self._timeEngaged = 0 54 | self._currentTargetFocusWeight = 0.0 55 | self._currentEyeCenter = None 56 | self._currentTargetFocusWeight = 0 57 | self._currentTargetDistance = None 58 | self._currentTargetName = None 59 | self._objectTarget = None 60 | self._targetDistance = 0 61 | self._lockedObjectTarget = None 62 | self._eyeCoordMapper = EyeCoordMapper() 63 | self._isSetupReady = False 64 | self._intent = None 65 | self._startCtr = 0 66 | self._timeToReleaseLockedObject = 0 67 | self._seenObject = None 68 | self._prevSeenObject = None 69 | self._seenSameObjectCtr = 0 70 | self._noEyeTargetTimer = 0 71 | 72 | def _updateObjectTarget(self, lastFrameCaptured): 73 | objectTarget, targetDistance = lastFrameCaptured.findNewObject(objectName=None, exclusionNames={ObjectName.Person}) 74 | 75 | canRecordObject = True 76 | self._sameObjectWasDetected = False 77 | 78 | self._prevSeenObject = self._seenObject 79 | self._seenObject = objectTarget 80 | 81 | realObjectTarget = None 82 | # Only consider an object seen a few times in a row to avoid blip 83 | if self._seenObject is not None and self._prevSeenObject is not None: 84 | if self._seenObject.name == self._prevSeenObject.name: 85 | self._seenSameObjectCtr += 1 86 | if self._seenSameObjectCtr >= self._TIMES_SEEN_TO_CONSIDER_OBJECT_REAL: 87 | realObjectTarget = self._seenObject 88 | else: 89 | self._seenSameObjectCtr = 0 90 | else: 91 | self._seenSameObjectCtr = 0 92 | 93 | # Make sure we don't re-recognise the same object after playing the video about this object 94 | if realObjectTarget is not None: 95 | if self._lockedObjectTarget is not None: 96 | if objectTarget.name == self._lockedObjectTarget.name: 97 | canRecordObject = False 98 | self._sameObjectWasDetected = True 99 | 100 | if canRecordObject: 101 | self._objectTarget = realObjectTarget 102 | self._targetDistance = targetDistance 103 | else: 104 | self._objectTarget = None 105 | self._targetDistance = 0 106 | 107 | if self._objectTarget is not None: 108 | print("Detecting {0}".format(self._objectTarget.name)) 109 | 110 | def _updateEyeTarget(self, lastFrameCaptured): 111 | newEngagedTarget, newTargetDistance = self._getPersonToEngage(lastFrameCaptured) 112 | 113 | self._engagedTargets.append((newEngagedTarget, newTargetDistance)) 114 | 115 | if len(self._engagedTargets) >= self._MAX_ENGAGED_TARGET_HISTORY: 116 | self._engagedTargets = self._engagedTargets[-self._MAX_ENGAGED_TARGET_HISTORY:] 117 | 118 | totalHasTarget = 0 119 | targetMinDistance = None 120 | 121 | # Look back through all history to decide if we currently have a target 122 | # This is done to avoid state flickering 123 | for target, targetDistance in self._engagedTargets: 124 | if target is not None: 125 | totalHasTarget += 1 126 | 127 | if targetMinDistance is None or targetDistance < targetMinDistance: 128 | targetMinDistance = targetDistance 129 | 130 | if totalHasTarget > self._MAX_ENGAGED_TARGET_HISTORY / 2: 131 | # We have a target. Pick the target which is always the last in the stack 132 | for i in range(len(self._engagedTargets) - 1, 0, -1): 133 | if self._engagedTargets[i] is not None: 134 | self._lockToTarget(self._engagedTargets[-1]) 135 | break 136 | else: 137 | self._releaseTarget() 138 | 139 | def _updateBrainState(self): 140 | if self._intent.isBusy(): 141 | return 142 | 143 | # Don't change our state if we are currently talking 144 | newState = self._state 145 | 146 | # If have target, set state to engaging 147 | if self._currentTarget is not None: 148 | # If we were engaging and already asked the person to come then we jump into conversing 149 | if self._state == BrainState.Engaging: 150 | self._timeEngaged += 1 151 | 152 | if self._timeEngaged >= self._TIME_TO_FULLY_ENGAGED: 153 | newState = BrainState.Conversing 154 | self._intent.askToBringObject() 155 | elif self._state == BrainState.Idle: 156 | self._timeEngaged = 0 157 | newState = BrainState.Engaging 158 | self._intent.askToComeAndPlay() 159 | elif self._state == BrainState.Conversing: 160 | if self._objectTarget is not None: 161 | newState = BrainState.ObjectRecognised 162 | self._lockedObjectTarget = self._objectTarget 163 | self._timeToReleaseLockedObject = self._LOCK_OBJECT_PATIENCE 164 | self._intent.objectRecognised(self._lockedObjectTarget.name) 165 | else: 166 | if self._currentStateTime is not None and self._currentStateTime.get() / 1000 >= self._DELAY_BEFORE_ASKING_REPEAT_QUESTION: 167 | if self._sameObjectWasDetected and self._lockedObjectTarget is not None: 168 | self._intent.askToBringNewObject(self._lockedObjectTarget.name) 169 | else: 170 | self._intent.askToBringObject() 171 | 172 | newState = BrainState.AskToBringAnotherObject 173 | elif self._state == BrainState.AskToBringAnotherObject: 174 | newState = BrainState.Conversing 175 | elif self._state == BrainState.ObjectRecognised: 176 | newState = BrainState.PlayingVideo 177 | if not self._intent.playVideo(self._lockedObjectTarget.name): 178 | newState = BrainState.FailedPlayingVideo 179 | elif self._state == BrainState.FailedPlayingVideo: 180 | self._intent.dontHaveVideo(self._lockedObjectTarget.name) 181 | newState = BrainState.Conversing 182 | elif self._state == BrainState.PlayingVideo: 183 | if not self._intent.isPlayingVideo(): 184 | self._intent.stopVideo() 185 | newState = BrainState.Conversing 186 | self._intent.askToBringAnotherObject() 187 | else: 188 | # Slowly getting more and more disengaged if no target 189 | self._timeToDissengaged -= 1 190 | self._timeToReleaseLockedObject -= 1 191 | 192 | if self._timeToReleaseLockedObject <= 0 and self._state == BrainState.ObjectRecognised: 193 | newState = BrainState.Conversing 194 | self._lockedObjectTarget = None 195 | 196 | if self._timeToDissengaged <= 0: 197 | # Run out of patience, not having a target for a very long time, change to idle state 198 | newState = BrainState.Idle 199 | # Reset this locked object so we can re-recognise again 200 | self._lockedObjectTarget = None 201 | 202 | if self._state != BrainState.Idle and self._currentTarget is not None: 203 | self._timeToDissengaged = self._DISSENGAGED_PATIENCE 204 | 205 | self._setState(newState) 206 | 207 | def _setState(self, newState): 208 | if newState != self._state: 209 | self._currentStateTime = Stopwatch() 210 | 211 | self._state = newState 212 | 213 | def _lockToTarget(self, targetTuple): 214 | self._currentTarget, self._currentTargetDistance = targetTuple 215 | 216 | def _releaseTarget(self): 217 | self._currentTarget = None 218 | self._currentTargetDistance = None 219 | 220 | def _updateEyePosition(self): 221 | if self._currentTarget is not None: 222 | # We need to transform from the object detection coordinate system into puppet eye coordinate system 223 | newEyeCenter = self._eyeCoordMapper.transform(self._currentTarget.getEyeCenter()) 224 | # If we have eye target, then 225 | if self._currentEyeCenter is None: 226 | # If we don't have previous eye position, just update with the new one instantly 227 | self._currentEyeCenter = newEyeCenter 228 | else: 229 | # Else gaze slowly to the new position (using damping factor) so the eye do not flicker when the 230 | # detected person bounding box shifted/flicker 231 | curEyeCenter = self._currentEyeCenter.scale(self._EYE_MOVEMENT_SMOOTHING_FACTOR) 232 | newEyeCenter = newEyeCenter.scale(1.0 - self._EYE_MOVEMENT_SMOOTHING_FACTOR) 233 | 234 | self._currentEyeCenter = curEyeCenter.add(newEyeCenter) 235 | 236 | # As we have target, we slowly increasing eye target weight to overtake the eye fidget movement 237 | self._currentTargetFocusWeight = min(1.0, self._currentTargetFocusWeight + self._FOCUS_EYE_SPEED) 238 | self._noEyeTargetTimer = 0 239 | else: 240 | self._noEyeTargetTimer += 1 241 | if self._noEyeTargetTimer > self._NO_EYE_TARGET_PATIENCE: 242 | # No eye target, we slowly decreasing eye target weight to let eye fidget movement to take priority 243 | self._currentTargetFocusWeight = max(0.0, self._currentTargetFocusWeight - self._FOCUS_EYE_SPEED) 244 | 245 | # If we are totally disengaged or idle, reset eye center to None 246 | if self._currentTargetFocusWeight <= 0.0 or self._state == BrainState.Idle: 247 | self._currentEyeCenter = None 248 | 249 | def update(self, lastFrameCaptured): 250 | if self._isSetupReady: 251 | print("Setup ready!") 252 | self._updateEyeTarget(lastFrameCaptured) 253 | self._updateObjectTarget(lastFrameCaptured) 254 | self._updateBrainState() 255 | self._updateEyePosition() 256 | else: 257 | self._startCtr += 1 258 | maxScale = Config.frameScale(100) 259 | print("StartCtr {0} aiming {1}".format(self._startCtr, maxScale)) 260 | if self._startCtr > maxScale: 261 | self._intent = Intent() 262 | self._isSetupReady = True 263 | 264 | 265 | def _getPersonToEngage(self, lastFrameCaptured): 266 | targetToEngaged = None 267 | targetDistance = None 268 | # Find human in the lastFrameCaptured and if we have already an engaged target, stick to that as much as possible 269 | # so we don't alternate between person very frequently if both are close to each other 270 | if self._currentTarget is not None: 271 | # Find this target first if still there in the lastFrameCaptured 272 | targetToEngaged, targetDistance = lastFrameCaptured.findExistingObject(self._currentTarget) 273 | 274 | # Cannot find existing target to engage, try to find a new one 275 | if targetToEngaged is None and lastFrameCaptured is not None: 276 | targetToEngaged, targetDistance = lastFrameCaptured.findNewObject() 277 | 278 | return targetToEngaged, targetDistance 279 | 280 | def getState(self): 281 | return self.state 282 | 283 | def getEyeTargetParam(self): 284 | if self._isSetupReady: 285 | return self._currentEyeCenter, self._currentTargetFocusWeight 286 | return None, None 287 | -------------------------------------------------------------------------------- /qrio/tfutils/visualization_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """A set of functions that are used for visualization. 17 | 18 | These functions often receive an image, perform some visualization on the image. 19 | The functions do not return a value, instead they modify the image itself. 20 | 21 | """ 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import abc 27 | import collections 28 | # Set headless-friendly backend. 29 | import matplotlib; matplotlib.use('Agg') # pylint: disable=multiple-statements 30 | import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top 31 | import numpy as np 32 | import PIL.Image as Image 33 | import PIL.ImageColor as ImageColor 34 | import PIL.ImageDraw as ImageDraw 35 | import PIL.ImageFont as ImageFont 36 | import six 37 | from six.moves import range 38 | from six.moves import zip 39 | import tensorflow as tf 40 | 41 | from object_detection.core import standard_fields as fields 42 | from object_detection.utils import shape_utils 43 | 44 | _TITLE_LEFT_MARGIN = 10 45 | _TITLE_TOP_MARGIN = 10 46 | STANDARD_COLORS = [ 47 | 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 48 | 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 49 | 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 50 | 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 51 | 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 52 | 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 53 | 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 54 | 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 55 | 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 56 | 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 57 | 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 58 | 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 59 | 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 60 | 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 61 | 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 62 | 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 63 | 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 64 | 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 65 | 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 66 | 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 67 | 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 68 | 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 69 | 'WhiteSmoke', 'Yellow', 'YellowGreen' 70 | ] 71 | 72 | 73 | def _get_multiplier_for_color_randomness(): 74 | """Returns a multiplier to get semi-random colors from successive indices. 75 | 76 | This function computes a prime number, p, in the range [2, 17] that: 77 | - is closest to len(STANDARD_COLORS) / 10 78 | - does not divide len(STANDARD_COLORS) 79 | 80 | If no prime numbers in that range satisfy the constraints, p is returned as 1. 81 | 82 | Once p is established, it can be used as a multiplier to select 83 | non-consecutive colors from STANDARD_COLORS: 84 | colors = [(p * i) % len(STANDARD_COLORS) for i in range(20)] 85 | """ 86 | num_colors = len(STANDARD_COLORS) 87 | prime_candidates = [5, 7, 11, 13, 17] 88 | 89 | # Remove all prime candidates that divide the number of colors. 90 | prime_candidates = [p for p in prime_candidates if num_colors % p] 91 | if not prime_candidates: 92 | return 1 93 | 94 | # Return the closest prime number to num_colors / 10. 95 | abs_distance = [np.abs(num_colors / 10. - p) for p in prime_candidates] 96 | num_candidates = len(abs_distance) 97 | inds = [i for _, i in sorted(zip(abs_distance, range(num_candidates)))] 98 | return prime_candidates[inds[0]] 99 | 100 | 101 | def save_image_array_as_png(image, output_path): 102 | """Saves an image (represented as a numpy array) to PNG. 103 | 104 | Args: 105 | image: a numpy array with shape [height, width, 3]. 106 | output_path: path to which image should be written. 107 | """ 108 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 109 | with tf.gfile.Open(output_path, 'w') as fid: 110 | image_pil.save(fid, 'PNG') 111 | 112 | 113 | def encode_image_array_as_png_str(image): 114 | """Encodes a numpy array into a PNG string. 115 | 116 | Args: 117 | image: a numpy array with shape [height, width, 3]. 118 | 119 | Returns: 120 | PNG encoded image string. 121 | """ 122 | image_pil = Image.fromarray(np.uint8(image)) 123 | output = six.BytesIO() 124 | image_pil.save(output, format='PNG') 125 | png_string = output.getvalue() 126 | output.close() 127 | return png_string 128 | 129 | 130 | def draw_bounding_box_on_image_array(image, 131 | ymin, 132 | xmin, 133 | ymax, 134 | xmax, 135 | color='red', 136 | thickness=4, 137 | display_str_list=(), 138 | use_normalized_coordinates=True): 139 | """Adds a bounding box to an image (numpy array). 140 | 141 | Bounding box coordinates can be specified in either absolute (pixel) or 142 | normalized coordinates by setting the use_normalized_coordinates argument. 143 | 144 | Args: 145 | image: a numpy array with shape [height, width, 3]. 146 | ymin: ymin of bounding box. 147 | xmin: xmin of bounding box. 148 | ymax: ymax of bounding box. 149 | xmax: xmax of bounding box. 150 | color: color to draw bounding box. Default is red. 151 | thickness: line thickness. Default value is 4. 152 | display_str_list: list of strings to display in box 153 | (each to be shown on its own line). 154 | use_normalized_coordinates: If True (default), treat coordinates 155 | ymin, xmin, ymax, xmax as relative to the image. Otherwise treat 156 | coordinates as absolute. 157 | """ 158 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 159 | draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color, 160 | thickness, display_str_list, 161 | use_normalized_coordinates) 162 | np.copyto(image, np.array(image_pil)) 163 | 164 | 165 | def draw_bounding_box_on_image(image, 166 | ymin, 167 | xmin, 168 | ymax, 169 | xmax, 170 | color='red', 171 | thickness=4, 172 | display_str_list=(), 173 | use_normalized_coordinates=True): 174 | """Adds a bounding box to an image. 175 | 176 | Bounding box coordinates can be specified in either absolute (pixel) or 177 | normalized coordinates by setting the use_normalized_coordinates argument. 178 | 179 | Each string in display_str_list is displayed on a separate line above the 180 | bounding box in black text on a rectangle filled with the input 'color'. 181 | If the top of the bounding box extends to the edge of the image, the strings 182 | are displayed below the bounding box. 183 | 184 | Args: 185 | image: a PIL.Image object. 186 | ymin: ymin of bounding box. 187 | xmin: xmin of bounding box. 188 | ymax: ymax of bounding box. 189 | xmax: xmax of bounding box. 190 | color: color to draw bounding box. Default is red. 191 | thickness: line thickness. Default value is 4. 192 | display_str_list: list of strings to display in box 193 | (each to be shown on its own line). 194 | use_normalized_coordinates: If True (default), treat coordinates 195 | ymin, xmin, ymax, xmax as relative to the image. Otherwise treat 196 | coordinates as absolute. 197 | """ 198 | draw = ImageDraw.Draw(image) 199 | im_width, im_height = image.size 200 | if use_normalized_coordinates: 201 | (left, right, top, bottom) = (xmin * im_width, xmax * im_width, 202 | ymin * im_height, ymax * im_height) 203 | else: 204 | (left, right, top, bottom) = (xmin, xmax, ymin, ymax) 205 | draw.line([(left, top), (left, bottom), (right, bottom), 206 | (right, top), (left, top)], width=thickness, fill=color) 207 | try: 208 | font = ImageFont.truetype('arial.ttf', 50) 209 | except IOError: 210 | font = ImageFont.load_default() 211 | 212 | # If the total height of the display strings added to the top of the bounding 213 | # box exceeds the top of the image, stack the strings below the bounding box 214 | # instead of above. 215 | display_str_heights = [font.getsize(ds)[1] for ds in display_str_list] 216 | # Each display_str has a top and bottom margin of 0.05x. 217 | total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights) 218 | 219 | if top > total_display_str_height: 220 | text_bottom = top 221 | else: 222 | text_bottom = bottom + total_display_str_height 223 | # Reverse list and print from bottom to top. 224 | for display_str in display_str_list[::-1]: 225 | text_width, text_height = font.getsize(display_str) 226 | margin = np.ceil(0.05 * text_height) 227 | draw.rectangle( 228 | [(left, text_bottom - text_height - 2 * margin), (left + text_width, 229 | text_bottom)], 230 | fill=color) 231 | draw.text( 232 | (left + margin, text_bottom - text_height - margin), 233 | display_str, 234 | fill='black', 235 | font=font) 236 | text_bottom -= text_height - 2 * margin 237 | 238 | 239 | def draw_bounding_boxes_on_image_array(image, 240 | boxes, 241 | color='red', 242 | thickness=4, 243 | display_str_list_list=()): 244 | """Draws bounding boxes on image (numpy array). 245 | 246 | Args: 247 | image: a numpy array object. 248 | boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). 249 | The coordinates are in normalized format between [0, 1]. 250 | color: color to draw bounding box. Default is red. 251 | thickness: line thickness. Default value is 4. 252 | display_str_list_list: list of list of strings. 253 | a list of strings for each bounding box. 254 | The reason to pass a list of strings for a 255 | bounding box is that it might contain 256 | multiple labels. 257 | 258 | Raises: 259 | ValueError: if boxes is not a [N, 4] array 260 | """ 261 | image_pil = Image.fromarray(image) 262 | draw_bounding_boxes_on_image(image_pil, boxes, color, thickness, 263 | display_str_list_list) 264 | np.copyto(image, np.array(image_pil)) 265 | 266 | 267 | def draw_bounding_boxes_on_image(image, 268 | boxes, 269 | color='red', 270 | thickness=4, 271 | display_str_list_list=()): 272 | """Draws bounding boxes on image. 273 | 274 | Args: 275 | image: a PIL.Image object. 276 | boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). 277 | The coordinates are in normalized format between [0, 1]. 278 | color: color to draw bounding box. Default is red. 279 | thickness: line thickness. Default value is 4. 280 | display_str_list_list: list of list of strings. 281 | a list of strings for each bounding box. 282 | The reason to pass a list of strings for a 283 | bounding box is that it might contain 284 | multiple labels. 285 | 286 | Raises: 287 | ValueError: if boxes is not a [N, 4] array 288 | """ 289 | boxes_shape = boxes.shape 290 | if not boxes_shape: 291 | return 292 | if len(boxes_shape) != 2 or boxes_shape[1] != 4: 293 | raise ValueError('Input must be of size [N, 4]') 294 | for i in range(boxes_shape[0]): 295 | display_str_list = () 296 | if display_str_list_list: 297 | display_str_list = display_str_list_list[i] 298 | draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2], 299 | boxes[i, 3], color, thickness, display_str_list) 300 | 301 | 302 | def create_visualization_fn(category_index, include_masks=False, 303 | include_keypoints=False, include_track_ids=False, 304 | **kwargs): 305 | """Constructs a visualization function that can be wrapped in a py_func. 306 | 307 | py_funcs only accept positional arguments. This function returns a suitable 308 | function with the correct positional argument mapping. The positional 309 | arguments in order are: 310 | 0: image 311 | 1: boxes 312 | 2: classes 313 | 3: scores 314 | [4-6]: masks (optional) 315 | [4-6]: keypoints (optional) 316 | [4-6]: track_ids (optional) 317 | 318 | -- Example 1 -- 319 | vis_only_masks_fn = create_visualization_fn(category_index, 320 | include_masks=True, include_keypoints=False, include_track_ids=False, 321 | **kwargs) 322 | image = tf.py_func(vis_only_masks_fn, 323 | inp=[image, boxes, classes, scores, masks], 324 | Tout=tf.uint8) 325 | 326 | -- Example 2 -- 327 | vis_masks_and_track_ids_fn = create_visualization_fn(category_index, 328 | include_masks=True, include_keypoints=False, include_track_ids=True, 329 | **kwargs) 330 | image = tf.py_func(vis_masks_and_track_ids_fn, 331 | inp=[image, boxes, classes, scores, masks, track_ids], 332 | Tout=tf.uint8) 333 | 334 | Args: 335 | category_index: a dict that maps integer ids to category dicts. e.g. 336 | {1: {1: 'dog'}, 2: {2: 'cat'}, ...} 337 | include_masks: Whether masks should be expected as a positional argument in 338 | the returned function. 339 | include_keypoints: Whether keypoints should be expected as a positional 340 | argument in the returned function. 341 | include_track_ids: Whether track ids should be expected as a positional 342 | argument in the returned function. 343 | **kwargs: Additional kwargs that will be passed to 344 | visualize_boxes_and_labels_on_image_array. 345 | 346 | Returns: 347 | Returns a function that only takes tensors as positional arguments. 348 | """ 349 | 350 | def visualization_py_func_fn(*args): 351 | """Visualization function that can be wrapped in a tf.py_func. 352 | 353 | Args: 354 | *args: First 4 positional arguments must be: 355 | image - uint8 numpy array with shape (img_height, img_width, 3). 356 | boxes - a numpy array of shape [N, 4]. 357 | classes - a numpy array of shape [N]. 358 | scores - a numpy array of shape [N] or None. 359 | -- Optional positional arguments -- 360 | instance_masks - a numpy array of shape [N, image_height, image_width]. 361 | keypoints - a numpy array of shape [N, num_keypoints, 2]. 362 | track_ids - a numpy array of shape [N] with unique track ids. 363 | 364 | Returns: 365 | uint8 numpy array with shape (img_height, img_width, 3) with overlaid 366 | boxes. 367 | """ 368 | image = args[0] 369 | boxes = args[1] 370 | classes = args[2] 371 | scores = args[3] 372 | masks = keypoints = track_ids = None 373 | pos_arg_ptr = 4 # Positional argument for first optional tensor (masks). 374 | if include_masks: 375 | masks = args[pos_arg_ptr] 376 | pos_arg_ptr += 1 377 | if include_keypoints: 378 | keypoints = args[pos_arg_ptr] 379 | pos_arg_ptr += 1 380 | if include_track_ids: 381 | track_ids = args[pos_arg_ptr] 382 | 383 | return visualize_boxes_and_labels_on_image_array( 384 | image, 385 | boxes, 386 | classes, 387 | scores, 388 | category_index=category_index, 389 | instance_masks=masks, 390 | keypoints=keypoints, 391 | track_ids=track_ids, 392 | **kwargs) 393 | return visualization_py_func_fn 394 | 395 | 396 | def _resize_original_image(image, image_shape): 397 | image = tf.expand_dims(image, 0) 398 | image = tf.image.resize_images( 399 | image, 400 | image_shape, 401 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, 402 | align_corners=True) 403 | return tf.cast(tf.squeeze(image, 0), tf.uint8) 404 | 405 | 406 | def draw_bounding_boxes_on_image_tensors(images, 407 | boxes, 408 | classes, 409 | scores, 410 | category_index, 411 | original_image_spatial_shape=None, 412 | true_image_shape=None, 413 | instance_masks=None, 414 | keypoints=None, 415 | track_ids=None, 416 | max_boxes_to_draw=20, 417 | min_score_thresh=0.2, 418 | use_normalized_coordinates=True): 419 | """Draws bounding boxes, masks, and keypoints on batch of image tensors. 420 | 421 | Args: 422 | images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional 423 | channels will be ignored. If C = 1, then we convert the images to RGB 424 | images. 425 | boxes: [N, max_detections, 4] float32 tensor of detection boxes. 426 | classes: [N, max_detections] int tensor of detection classes. Note that 427 | classes are 1-indexed. 428 | scores: [N, max_detections] float32 tensor of detection scores. 429 | category_index: a dict that maps integer ids to category dicts. e.g. 430 | {1: {1: 'dog'}, 2: {2: 'cat'}, ...} 431 | original_image_spatial_shape: [N, 2] tensor containing the spatial size of 432 | the original image. 433 | true_image_shape: [N, 3] tensor containing the spatial size of unpadded 434 | original_image. 435 | instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with 436 | instance masks. 437 | keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2] 438 | with keypoints. 439 | track_ids: [N, max_detections] int32 tensor of unique tracks ids (i.e. 440 | instance ids for each object). If provided, the color-coding of boxes is 441 | dictated by these ids, and not classes. 442 | max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20. 443 | min_score_thresh: Minimum score threshold for visualization. Default 0.2. 444 | use_normalized_coordinates: Whether to assume boxes and kepoints are in 445 | normalized coordinates (as opposed to absolute coordiantes). 446 | Default is True. 447 | 448 | Returns: 449 | 4D image tensor of type uint8, with boxes drawn on top. 450 | """ 451 | # Additional channels are being ignored. 452 | if images.shape[3] > 3: 453 | images = images[:, :, :, 0:3] 454 | elif images.shape[3] == 1: 455 | images = tf.image.grayscale_to_rgb(images) 456 | visualization_keyword_args = { 457 | 'use_normalized_coordinates': use_normalized_coordinates, 458 | 'max_boxes_to_draw': max_boxes_to_draw, 459 | 'min_score_thresh': min_score_thresh, 460 | 'agnostic_mode': False, 461 | 'line_thickness': 4 462 | } 463 | if true_image_shape is None: 464 | true_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 3]) 465 | else: 466 | true_shapes = true_image_shape 467 | if original_image_spatial_shape is None: 468 | original_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 2]) 469 | else: 470 | original_shapes = original_image_spatial_shape 471 | 472 | visualize_boxes_fn = create_visualization_fn( 473 | category_index, 474 | include_masks=instance_masks is not None, 475 | include_keypoints=keypoints is not None, 476 | include_track_ids=track_ids is not None, 477 | **visualization_keyword_args) 478 | 479 | elems = [true_shapes, original_shapes, images, boxes, classes, scores] 480 | if instance_masks is not None: 481 | elems.append(instance_masks) 482 | if keypoints is not None: 483 | elems.append(keypoints) 484 | if track_ids is not None: 485 | elems.append(track_ids) 486 | 487 | def draw_boxes(image_and_detections): 488 | """Draws boxes on image.""" 489 | true_shape = image_and_detections[0] 490 | original_shape = image_and_detections[1] 491 | if true_image_shape is not None: 492 | image = shape_utils.pad_or_clip_nd(image_and_detections[2], 493 | [true_shape[0], true_shape[1], 3]) 494 | if original_image_spatial_shape is not None: 495 | image_and_detections[2] = _resize_original_image(image, original_shape) 496 | 497 | image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections[2:], 498 | tf.uint8) 499 | return image_with_boxes 500 | 501 | images = tf.map_fn(draw_boxes, elems, dtype=tf.uint8, back_prop=False) 502 | return images 503 | 504 | 505 | def draw_side_by_side_evaluation_image(eval_dict, 506 | category_index, 507 | max_boxes_to_draw=20, 508 | min_score_thresh=0.2, 509 | use_normalized_coordinates=True): 510 | """Creates a side-by-side image with detections and groundtruth. 511 | 512 | Bounding boxes (and instance masks, if available) are visualized on both 513 | subimages. 514 | 515 | Args: 516 | eval_dict: The evaluation dictionary returned by 517 | eval_util.result_dict_for_batched_example() or 518 | eval_util.result_dict_for_single_example(). 519 | category_index: A category index (dictionary) produced from a labelmap. 520 | max_boxes_to_draw: The maximum number of boxes to draw for detections. 521 | min_score_thresh: The minimum score threshold for showing detections. 522 | use_normalized_coordinates: Whether to assume boxes and kepoints are in 523 | normalized coordinates (as opposed to absolute coordiantes). 524 | Default is True. 525 | 526 | Returns: 527 | A list of [1, H, 2 * W, C] uint8 tensor. The subimage on the left 528 | corresponds to detections, while the subimage on the right corresponds to 529 | groundtruth. 530 | """ 531 | detection_fields = fields.DetectionResultFields() 532 | input_data_fields = fields.InputDataFields() 533 | 534 | images_with_detections_list = [] 535 | 536 | # Add the batch dimension if the eval_dict is for single example. 537 | if len(eval_dict[detection_fields.detection_classes].shape) == 1: 538 | for key in eval_dict: 539 | if key != input_data_fields.original_image: 540 | eval_dict[key] = tf.expand_dims(eval_dict[key], 0) 541 | 542 | for indx in range(eval_dict[input_data_fields.original_image].shape[0]): 543 | instance_masks = None 544 | if detection_fields.detection_masks in eval_dict: 545 | instance_masks = tf.cast( 546 | tf.expand_dims( 547 | eval_dict[detection_fields.detection_masks][indx], axis=0), 548 | tf.uint8) 549 | keypoints = None 550 | if detection_fields.detection_keypoints in eval_dict: 551 | keypoints = tf.expand_dims( 552 | eval_dict[detection_fields.detection_keypoints][indx], axis=0) 553 | groundtruth_instance_masks = None 554 | if input_data_fields.groundtruth_instance_masks in eval_dict: 555 | groundtruth_instance_masks = tf.cast( 556 | tf.expand_dims( 557 | eval_dict[input_data_fields.groundtruth_instance_masks][indx], 558 | axis=0), tf.uint8) 559 | 560 | images_with_detections = draw_bounding_boxes_on_image_tensors( 561 | tf.expand_dims( 562 | eval_dict[input_data_fields.original_image][indx], axis=0), 563 | tf.expand_dims( 564 | eval_dict[detection_fields.detection_boxes][indx], axis=0), 565 | tf.expand_dims( 566 | eval_dict[detection_fields.detection_classes][indx], axis=0), 567 | tf.expand_dims( 568 | eval_dict[detection_fields.detection_scores][indx], axis=0), 569 | category_index, 570 | original_image_spatial_shape=tf.expand_dims( 571 | eval_dict[input_data_fields.original_image_spatial_shape][indx], 572 | axis=0), 573 | true_image_shape=tf.expand_dims( 574 | eval_dict[input_data_fields.true_image_shape][indx], axis=0), 575 | instance_masks=instance_masks, 576 | keypoints=keypoints, 577 | max_boxes_to_draw=max_boxes_to_draw, 578 | min_score_thresh=min_score_thresh, 579 | use_normalized_coordinates=use_normalized_coordinates) 580 | images_with_groundtruth = draw_bounding_boxes_on_image_tensors( 581 | tf.expand_dims( 582 | eval_dict[input_data_fields.original_image][indx], axis=0), 583 | tf.expand_dims( 584 | eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0), 585 | tf.expand_dims( 586 | eval_dict[input_data_fields.groundtruth_classes][indx], axis=0), 587 | tf.expand_dims( 588 | tf.ones_like( 589 | eval_dict[input_data_fields.groundtruth_classes][indx], 590 | dtype=tf.float32), 591 | axis=0), 592 | category_index, 593 | original_image_spatial_shape=tf.expand_dims( 594 | eval_dict[input_data_fields.original_image_spatial_shape][indx], 595 | axis=0), 596 | true_image_shape=tf.expand_dims( 597 | eval_dict[input_data_fields.true_image_shape][indx], axis=0), 598 | instance_masks=groundtruth_instance_masks, 599 | keypoints=None, 600 | max_boxes_to_draw=None, 601 | min_score_thresh=0.0, 602 | use_normalized_coordinates=use_normalized_coordinates) 603 | images_with_detections_list.append( 604 | tf.concat([images_with_detections, images_with_groundtruth], axis=2)) 605 | return images_with_detections_list 606 | 607 | 608 | def draw_keypoints_on_image_array(image, 609 | keypoints, 610 | color='red', 611 | radius=2, 612 | use_normalized_coordinates=True): 613 | """Draws keypoints on an image (numpy array). 614 | 615 | Args: 616 | image: a numpy array with shape [height, width, 3]. 617 | keypoints: a numpy array with shape [num_keypoints, 2]. 618 | color: color to draw the keypoints with. Default is red. 619 | radius: keypoint radius. Default value is 2. 620 | use_normalized_coordinates: if True (default), treat keypoint values as 621 | relative to the image. Otherwise treat them as absolute. 622 | """ 623 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 624 | draw_keypoints_on_image(image_pil, keypoints, color, radius, 625 | use_normalized_coordinates) 626 | np.copyto(image, np.array(image_pil)) 627 | 628 | 629 | def draw_keypoints_on_image(image, 630 | keypoints, 631 | color='red', 632 | radius=2, 633 | use_normalized_coordinates=True): 634 | """Draws keypoints on an image. 635 | 636 | Args: 637 | image: a PIL.Image object. 638 | keypoints: a numpy array with shape [num_keypoints, 2]. 639 | color: color to draw the keypoints with. Default is red. 640 | radius: keypoint radius. Default value is 2. 641 | use_normalized_coordinates: if True (default), treat keypoint values as 642 | relative to the image. Otherwise treat them as absolute. 643 | """ 644 | draw = ImageDraw.Draw(image) 645 | im_width, im_height = image.size 646 | keypoints_x = [k[1] for k in keypoints] 647 | keypoints_y = [k[0] for k in keypoints] 648 | if use_normalized_coordinates: 649 | keypoints_x = tuple([im_width * x for x in keypoints_x]) 650 | keypoints_y = tuple([im_height * y for y in keypoints_y]) 651 | for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y): 652 | draw.ellipse([(keypoint_x - radius, keypoint_y - radius), 653 | (keypoint_x + radius, keypoint_y + radius)], 654 | outline=color, fill=color) 655 | 656 | 657 | def draw_mask_on_image_array(image, mask, color='red', alpha=0.4): 658 | """Draws mask on an image. 659 | 660 | Args: 661 | image: uint8 numpy array with shape (img_height, img_height, 3) 662 | mask: a uint8 numpy array of shape (img_height, img_height) with 663 | values between either 0 or 1. 664 | color: color to draw the keypoints with. Default is red. 665 | alpha: transparency value between 0 and 1. (default: 0.4) 666 | 667 | Raises: 668 | ValueError: On incorrect data type for image or masks. 669 | """ 670 | if image.dtype != np.uint8: 671 | raise ValueError('`image` not of type np.uint8') 672 | if mask.dtype != np.uint8: 673 | raise ValueError('`mask` not of type np.uint8') 674 | if np.any(np.logical_and(mask != 1, mask != 0)): 675 | raise ValueError('`mask` elements should be in [0, 1]') 676 | if image.shape[:2] != mask.shape: 677 | raise ValueError('The image has spatial dimensions %s but the mask has ' 678 | 'dimensions %s' % (image.shape[:2], mask.shape)) 679 | rgb = ImageColor.getrgb(color) 680 | pil_image = Image.fromarray(image) 681 | 682 | solid_color = np.expand_dims( 683 | np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3]) 684 | pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA') 685 | pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L') 686 | pil_image = Image.composite(pil_solid_color, pil_image, pil_mask) 687 | np.copyto(image, np.array(pil_image.convert('RGB'))) 688 | 689 | 690 | def visualize_boxes_and_labels_on_image_array( 691 | image, 692 | boxes, 693 | classes, 694 | scores, 695 | category_index, 696 | instance_masks=None, 697 | instance_boundaries=None, 698 | keypoints=None, 699 | track_ids=None, 700 | use_normalized_coordinates=False, 701 | max_boxes_to_draw=20, 702 | min_score_thresh=.5, 703 | agnostic_mode=False, 704 | line_thickness=4, 705 | groundtruth_box_visualization_color='black', 706 | skip_scores=False, 707 | skip_labels=False, 708 | skip_track_ids=False): 709 | """Overlay labeled boxes on an image with formatted scores and label names. 710 | 711 | This function groups boxes that correspond to the same location 712 | and creates a display string for each detection and overlays these 713 | on the image. Note that this function modifies the image in place, and returns 714 | that same image. 715 | 716 | Args: 717 | image: uint8 numpy array with shape (img_height, img_width, 3) 718 | boxes: a numpy array of shape [N, 4] 719 | classes: a numpy array of shape [N]. Note that class indices are 1-based, 720 | and match the keys in the label map. 721 | scores: a numpy array of shape [N] or None. If scores=None, then 722 | this function assumes that the boxes to be plotted are groundtruth 723 | boxes and plot all boxes as black with no classes or scores. 724 | category_index: a dict containing category dictionaries (each holding 725 | category index `id` and category name `name`) keyed by category indices. 726 | instance_masks: a numpy array of shape [N, image_height, image_width] with 727 | values ranging between 0 and 1, can be None. 728 | instance_boundaries: a numpy array of shape [N, image_height, image_width] 729 | with values ranging between 0 and 1, can be None. 730 | keypoints: a numpy array of shape [N, num_keypoints, 2], can 731 | be None 732 | track_ids: a numpy array of shape [N] with unique track ids. If provided, 733 | color-coding of boxes will be determined by these ids, and not the class 734 | indices. 735 | use_normalized_coordinates: whether boxes is to be interpreted as 736 | normalized coordinates or not. 737 | max_boxes_to_draw: maximum number of boxes to visualize. If None, draw 738 | all boxes. 739 | min_score_thresh: minimum score threshold for a box to be visualized 740 | agnostic_mode: boolean (default: False) controlling whether to evaluate in 741 | class-agnostic mode or not. This mode will display scores but ignore 742 | classes. 743 | line_thickness: integer (default: 4) controlling line width of the boxes. 744 | groundtruth_box_visualization_color: box color for visualizing groundtruth 745 | boxes 746 | skip_scores: whether to skip score when drawing a single detection 747 | skip_labels: whether to skip label when drawing a single detection 748 | skip_track_ids: whether to skip track id when drawing a single detection 749 | 750 | Returns: 751 | uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes. 752 | """ 753 | # Create a display string (and color) for every box location, group any boxes 754 | # that correspond to the same location. 755 | box_to_display_str_map = collections.defaultdict(list) 756 | box_to_color_map = collections.defaultdict(str) 757 | box_to_instance_masks_map = {} 758 | box_to_instance_boundaries_map = {} 759 | box_to_keypoints_map = collections.defaultdict(list) 760 | box_to_track_ids_map = {} 761 | if not max_boxes_to_draw: 762 | max_boxes_to_draw = boxes.shape[0] 763 | for i in range(min(max_boxes_to_draw, boxes.shape[0])): 764 | if scores is None or scores[i] > min_score_thresh: 765 | box = tuple(boxes[i].tolist()) 766 | if instance_masks is not None: 767 | box_to_instance_masks_map[box] = instance_masks[i] 768 | if instance_boundaries is not None: 769 | box_to_instance_boundaries_map[box] = instance_boundaries[i] 770 | if keypoints is not None: 771 | box_to_keypoints_map[box].extend(keypoints[i]) 772 | if track_ids is not None: 773 | box_to_track_ids_map[box] = track_ids[i] 774 | if scores is None: 775 | box_to_color_map[box] = groundtruth_box_visualization_color 776 | else: 777 | display_str = '' 778 | if not skip_labels: 779 | if not agnostic_mode: 780 | if classes[i] in six.viewkeys(category_index): 781 | class_name = category_index[classes[i]]['name'] 782 | else: 783 | class_name = 'N/A' 784 | display_str = str(class_name) 785 | if not skip_scores: 786 | if not display_str: 787 | display_str = '{}%'.format(int(100*scores[i])) 788 | else: 789 | display_str = '{}: {}%'.format(display_str, int(100*scores[i])) 790 | if not skip_track_ids and track_ids is not None: 791 | if not display_str: 792 | display_str = 'ID {}'.format(track_ids[i]) 793 | else: 794 | display_str = '{}: ID {}'.format(display_str, track_ids[i]) 795 | box_to_display_str_map[box].append(display_str) 796 | if agnostic_mode: 797 | box_to_color_map[box] = 'DarkOrange' 798 | elif track_ids is not None: 799 | prime_multipler = _get_multiplier_for_color_randomness() 800 | box_to_color_map[box] = STANDARD_COLORS[ 801 | (prime_multipler * track_ids[i]) % len(STANDARD_COLORS)] 802 | else: 803 | box_to_color_map[box] = STANDARD_COLORS[ 804 | classes[i] % len(STANDARD_COLORS)] 805 | 806 | # Draw all boxes onto image. 807 | for box, color in box_to_color_map.items(): 808 | ymin, xmin, ymax, xmax = box 809 | if instance_masks is not None: 810 | draw_mask_on_image_array( 811 | image, 812 | box_to_instance_masks_map[box], 813 | color=color 814 | ) 815 | if instance_boundaries is not None: 816 | draw_mask_on_image_array( 817 | image, 818 | box_to_instance_boundaries_map[box], 819 | color='red', 820 | alpha=1.0 821 | ) 822 | draw_bounding_box_on_image_array( 823 | image, 824 | ymin, 825 | xmin, 826 | ymax, 827 | xmax, 828 | color=color, 829 | thickness=line_thickness, 830 | display_str_list=box_to_display_str_map[box], 831 | use_normalized_coordinates=use_normalized_coordinates) 832 | if keypoints is not None: 833 | draw_keypoints_on_image_array( 834 | image, 835 | box_to_keypoints_map[box], 836 | color=color, 837 | radius=line_thickness / 2, 838 | use_normalized_coordinates=use_normalized_coordinates) 839 | 840 | return image 841 | 842 | 843 | def add_cdf_image_summary(values, name): 844 | """Adds a tf.summary.image for a CDF plot of the values. 845 | 846 | Normalizes `values` such that they sum to 1, plots the cumulative distribution 847 | function and creates a tf image summary. 848 | 849 | Args: 850 | values: a 1-D float32 tensor containing the values. 851 | name: name for the image summary. 852 | """ 853 | def cdf_plot(values): 854 | """Numpy function to plot CDF.""" 855 | normalized_values = values / np.sum(values) 856 | sorted_values = np.sort(normalized_values) 857 | cumulative_values = np.cumsum(sorted_values) 858 | fraction_of_examples = (np.arange(cumulative_values.size, dtype=np.float32) 859 | / cumulative_values.size) 860 | fig = plt.figure(frameon=False) 861 | ax = fig.add_subplot('111') 862 | ax.plot(fraction_of_examples, cumulative_values) 863 | ax.set_ylabel('cumulative normalized values') 864 | ax.set_xlabel('fraction of examples') 865 | fig.canvas.draw() 866 | width, height = fig.get_size_inches() * fig.get_dpi() 867 | image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape( 868 | 1, int(height), int(width), 3) 869 | return image 870 | cdf_plot = tf.py_func(cdf_plot, [values], tf.uint8) 871 | tf.summary.image(name, cdf_plot) 872 | 873 | 874 | def add_hist_image_summary(values, bins, name): 875 | """Adds a tf.summary.image for a histogram plot of the values. 876 | 877 | Plots the histogram of values and creates a tf image summary. 878 | 879 | Args: 880 | values: a 1-D float32 tensor containing the values. 881 | bins: bin edges which will be directly passed to np.histogram. 882 | name: name for the image summary. 883 | """ 884 | 885 | def hist_plot(values, bins): 886 | """Numpy function to plot hist.""" 887 | fig = plt.figure(frameon=False) 888 | ax = fig.add_subplot('111') 889 | y, x = np.histogram(values, bins=bins) 890 | ax.plot(x[:-1], y) 891 | ax.set_ylabel('count') 892 | ax.set_xlabel('value') 893 | fig.canvas.draw() 894 | width, height = fig.get_size_inches() * fig.get_dpi() 895 | image = np.fromstring( 896 | fig.canvas.tostring_rgb(), dtype='uint8').reshape( 897 | 1, int(height), int(width), 3) 898 | return image 899 | hist_plot = tf.py_func(hist_plot, [values, bins], tf.uint8) 900 | tf.summary.image(name, hist_plot) 901 | 902 | 903 | class EvalMetricOpsVisualization(six.with_metaclass(abc.ABCMeta, object)): 904 | """Abstract base class responsible for visualizations during evaluation. 905 | 906 | Currently, summary images are not run during evaluation. One way to produce 907 | evaluation images in Tensorboard is to provide tf.summary.image strings as 908 | `value_ops` in tf.estimator.EstimatorSpec's `eval_metric_ops`. This class is 909 | responsible for accruing images (with overlaid detections and groundtruth) 910 | and returning a dictionary that can be passed to `eval_metric_ops`. 911 | """ 912 | 913 | def __init__(self, 914 | category_index, 915 | max_examples_to_draw=5, 916 | max_boxes_to_draw=20, 917 | min_score_thresh=0.2, 918 | use_normalized_coordinates=True, 919 | summary_name_prefix='evaluation_image'): 920 | """Creates an EvalMetricOpsVisualization. 921 | 922 | Args: 923 | category_index: A category index (dictionary) produced from a labelmap. 924 | max_examples_to_draw: The maximum number of example summaries to produce. 925 | max_boxes_to_draw: The maximum number of boxes to draw for detections. 926 | min_score_thresh: The minimum score threshold for showing detections. 927 | use_normalized_coordinates: Whether to assume boxes and kepoints are in 928 | normalized coordinates (as opposed to absolute coordiantes). 929 | Default is True. 930 | summary_name_prefix: A string prefix for each image summary. 931 | """ 932 | 933 | self._category_index = category_index 934 | self._max_examples_to_draw = max_examples_to_draw 935 | self._max_boxes_to_draw = max_boxes_to_draw 936 | self._min_score_thresh = min_score_thresh 937 | self._use_normalized_coordinates = use_normalized_coordinates 938 | self._summary_name_prefix = summary_name_prefix 939 | self._images = [] 940 | 941 | def clear(self): 942 | self._images = [] 943 | 944 | def add_images(self, images): 945 | """Store a list of images, each with shape [1, H, W, C].""" 946 | if len(self._images) >= self._max_examples_to_draw: 947 | return 948 | 949 | # Store images and clip list if necessary. 950 | self._images.extend(images) 951 | if len(self._images) > self._max_examples_to_draw: 952 | self._images[self._max_examples_to_draw:] = [] 953 | 954 | def get_estimator_eval_metric_ops(self, eval_dict): 955 | """Returns metric ops for use in tf.estimator.EstimatorSpec. 956 | 957 | Args: 958 | eval_dict: A dictionary that holds an image, groundtruth, and detections 959 | for a batched example. Note that, we use only the first example for 960 | visualization. See eval_util.result_dict_for_batched_example() for a 961 | convenient method for constructing such a dictionary. The dictionary 962 | contains 963 | fields.InputDataFields.original_image: [batch_size, H, W, 3] image. 964 | fields.InputDataFields.original_image_spatial_shape: [batch_size, 2] 965 | tensor containing the size of the original image. 966 | fields.InputDataFields.true_image_shape: [batch_size, 3] 967 | tensor containing the spatial size of the upadded original image. 968 | fields.InputDataFields.groundtruth_boxes - [batch_size, num_boxes, 4] 969 | float32 tensor with groundtruth boxes in range [0.0, 1.0]. 970 | fields.InputDataFields.groundtruth_classes - [batch_size, num_boxes] 971 | int64 tensor with 1-indexed groundtruth classes. 972 | fields.InputDataFields.groundtruth_instance_masks - (optional) 973 | [batch_size, num_boxes, H, W] int64 tensor with instance masks. 974 | fields.DetectionResultFields.detection_boxes - [batch_size, 975 | max_num_boxes, 4] float32 tensor with detection boxes in range [0.0, 976 | 1.0]. 977 | fields.DetectionResultFields.detection_classes - [batch_size, 978 | max_num_boxes] int64 tensor with 1-indexed detection classes. 979 | fields.DetectionResultFields.detection_scores - [batch_size, 980 | max_num_boxes] float32 tensor with detection scores. 981 | fields.DetectionResultFields.detection_masks - (optional) [batch_size, 982 | max_num_boxes, H, W] float32 tensor of binarized masks. 983 | fields.DetectionResultFields.detection_keypoints - (optional) 984 | [batch_size, max_num_boxes, num_keypoints, 2] float32 tensor with 985 | keypoints. 986 | 987 | Returns: 988 | A dictionary of image summary names to tuple of (value_op, update_op). The 989 | `update_op` is the same for all items in the dictionary, and is 990 | responsible for saving a single side-by-side image with detections and 991 | groundtruth. Each `value_op` holds the tf.summary.image string for a given 992 | image. 993 | """ 994 | if self._max_examples_to_draw == 0: 995 | return {} 996 | images = self.images_from_evaluation_dict(eval_dict) 997 | 998 | def get_images(): 999 | """Returns a list of images, padded to self._max_images_to_draw.""" 1000 | images = self._images 1001 | while len(images) < self._max_examples_to_draw: 1002 | images.append(np.array(0, dtype=np.uint8)) 1003 | self.clear() 1004 | return images 1005 | 1006 | def image_summary_or_default_string(summary_name, image): 1007 | """Returns image summaries for non-padded elements.""" 1008 | return tf.cond( 1009 | tf.equal(tf.size(tf.shape(image)), 4), 1010 | lambda: tf.summary.image(summary_name, image), 1011 | lambda: tf.constant('')) 1012 | 1013 | if tf.executing_eagerly(): 1014 | update_op = self.add_images([[images[0]]]) 1015 | image_tensors = get_images() 1016 | else: 1017 | update_op = tf.py_func(self.add_images, [[images[0]]], []) 1018 | image_tensors = tf.py_func( 1019 | get_images, [], [tf.uint8] * self._max_examples_to_draw) 1020 | eval_metric_ops = {} 1021 | for i, image in enumerate(image_tensors): 1022 | summary_name = self._summary_name_prefix + '/' + str(i) 1023 | value_op = image_summary_or_default_string(summary_name, image) 1024 | eval_metric_ops[summary_name] = (value_op, update_op) 1025 | return eval_metric_ops 1026 | 1027 | @abc.abstractmethod 1028 | def images_from_evaluation_dict(self, eval_dict): 1029 | """Converts evaluation dictionary into a list of image tensors. 1030 | 1031 | To be overridden by implementations. 1032 | 1033 | Args: 1034 | eval_dict: A dictionary with all the necessary information for producing 1035 | visualizations. 1036 | 1037 | Returns: 1038 | A list of [1, H, W, C] uint8 tensors. 1039 | """ 1040 | raise NotImplementedError 1041 | 1042 | 1043 | class VisualizeSingleFrameDetections(EvalMetricOpsVisualization): 1044 | """Class responsible for single-frame object detection visualizations.""" 1045 | 1046 | def __init__(self, 1047 | category_index, 1048 | max_examples_to_draw=5, 1049 | max_boxes_to_draw=20, 1050 | min_score_thresh=0.2, 1051 | use_normalized_coordinates=True, 1052 | summary_name_prefix='Detections_Left_Groundtruth_Right'): 1053 | super(VisualizeSingleFrameDetections, self).__init__( 1054 | category_index=category_index, 1055 | max_examples_to_draw=max_examples_to_draw, 1056 | max_boxes_to_draw=max_boxes_to_draw, 1057 | min_score_thresh=min_score_thresh, 1058 | use_normalized_coordinates=use_normalized_coordinates, 1059 | summary_name_prefix=summary_name_prefix) 1060 | 1061 | def images_from_evaluation_dict(self, eval_dict): 1062 | return draw_side_by_side_evaluation_image( 1063 | eval_dict, self._category_index, self._max_boxes_to_draw, 1064 | self._min_score_thresh, self._use_normalized_coordinates) 1065 | --------------------------------------------------------------------------------