@phdthesis{Harich2016, type = {Master Thesis}, author = {Nicolai Harich}, title = {Fully Convolutional Networks for Semantic Segmentation from RGB-D images}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:900-opus4-48807}, pages = {99}, year = {2016}, abstract = {In recent years new trends such as industry 4.0 boosted the research and development in the field of autonomous systems and robotics. Robots collaborate and even take over complete tasks of humans. But the high degree of automation requires high reliability even in complex and changing environments. Those challenging conditions make it hard to rely on static models of the real world. In addition to adaptable maps, mobile robots require a local and current understanding of the scene. The Bosch Start-Up Company is developing robots for intra-logistic systems, which could highly benefit from such a detailed scene understanding. The aim of this work is to research and develop such a system for warehouse environments. While the possible field of application is in general very broad, this work will focus on the detection and localization of warehouse specific objects such as palettes. In order to provide a meaningful perception of the surrounding a RGB-D camera is used. A pre-trained convolutional network extracts scene understanding in the form of pixelwise class labels. As this convolutional network is the core of the application, this work focuses on different network set-ups and learning strategies. One difficulty was the lack of annotated training data. Since the creation of densely labeled images is a very time consuming process it was important to elaborate on good alternatives. One interesting finding was that it’s possible to transfer learning to a high extent from similar models pre-trained on thousands of RGB-images. This is done by selective interventions on the net parameters. By ensuring a good initialization it’s possible to train towards a well performing model within few iterations. In this way it’s possible to train even branched nets at once. This can also be achieved by including certain normalization steps. Another important aspect was to find a suitable way to incorporate depth-information. How to fuse depth into the existing model? By providing the height over ground as an additional feature the segmentation accuracy was further improved while keeping the extra computational costs low. Finally the segmentation maps are refined by a conditional random field. The joint training of both parts results in accurate object segmentations comparable to recently published state-of-the-art models.}, language = {en} }