
@article{ref1,
title="Semantic pyramids for gender and action recognition",
journal="IEEE transactions on image processing",
year="2014",
author="Shahbaz, Fahad and van de Weijer, Joost and Rao, Muhammad Anwer and Felsberg, Michael and Gatta, Carlo",
volume="23",
number="8",
pages="3633-3645",
abstract="Person description is a challenging problem in computer vision. We investigate two major aspects of person description: gender and action recognition in still images. Most state-of-the-art approaches for gender and action recognition rely on the description of a single body part such as face or full-body. However, relying on a single body part is sub-optimal due to significant variations in scale, viewpoint and pose in real-world images. This paper proposes a semantic pyramid approach for pose normalization. Our approach is fully automatic and based on combining information from full-body, upper-body and face regions for gender and action recognition in still images. The proposed approach does not require any annotations for upper-body and face of a person. Instead, we rely on pre-trained state-of-the-art upper-body and face detectors to automatically extract semantic information of a person. Given multiple bounding boxes from each body part detector, we then propose a simple method to select the best candidate bounding box which is used for feature extraction. Finally, the extracted features from the full-body, upper-body and face regions are combined into a single representation for classification. To validate the proposed approach for gender recognition, experiments are performed on three large datasets namely: Human attribute, Head-Shoulder and Proxemics. For action recognition, we perform experiments on four datasets most used for benchmarking action recognition in still images: Sports, Willow, PASCAL VOC 2010 and Stanford-40. Our experiments clearly demonstrate that the proposed approach, despite its simplicity, outperforms state-of-the-art methods for gender and action recognition.<p /> <p>Language: en</p>",
language="en",
issn="1057-7149",
doi="10.1109/TIP.2014.2331759",
url="http://dx.doi.org/10.1109/TIP.2014.2331759"
}