In this paper a word spotting approach to index archival image documents is presented. Indices are constructed from keyword images. The spotting strategy is formulated on an indexing-by-shape basis. The well known shape context descriptor is used to compute word image signatures from the skeleton points. Afterwards, codewords are extracted from thresholded shape contexts. It is a simpler and more compact representation based on bit vectors. Document images are roughly segmented into words and a lookup table is constructed. Each word subimage is taken as a bin. Keyword images are spotted into documents by a voting strategy consisting in indexing into the lookup table by codewords, and voting into the corresponding bins. The approach is illustrated by a real application scenario consisting of documents from a digital archive of the Spanish Civil War.
%0 Conference Paper
%1 4378733
%A Llados, J.
%A Sanchez, G.
%B Ninth International Conference on Document Analysis and Recognition (ICDAR 2007)
%D 2007
%K documents historicaldocuments indexing
%P 362-366
%R 10.1109/ICDAR.2007.4378733
%T Indexing Historical Documents by Word Shape Signatures
%U http://ieeexplore.ieee.org/abstract/document/4378733
%V 1
%X In this paper a word spotting approach to index archival image documents is presented. Indices are constructed from keyword images. The spotting strategy is formulated on an indexing-by-shape basis. The well known shape context descriptor is used to compute word image signatures from the skeleton points. Afterwards, codewords are extracted from thresholded shape contexts. It is a simpler and more compact representation based on bit vectors. Document images are roughly segmented into words and a lookup table is constructed. Each word subimage is taken as a bin. Keyword images are spotted into documents by a voting strategy consisting in indexing into the lookup table by codewords, and voting into the corresponding bins. The approach is illustrated by a real application scenario consisting of documents from a digital archive of the Spanish Civil War.
@inproceedings{4378733,
abstract = {In this paper a word spotting approach to index archival image documents is presented. Indices are constructed from keyword images. The spotting strategy is formulated on an indexing-by-shape basis. The well known shape context descriptor is used to compute word image signatures from the skeleton points. Afterwards, codewords are extracted from thresholded shape contexts. It is a simpler and more compact representation based on bit vectors. Document images are roughly segmented into words and a lookup table is constructed. Each word subimage is taken as a bin. Keyword images are spotted into documents by a voting strategy consisting in indexing into the lookup table by codewords, and voting into the corresponding bins. The approach is illustrated by a real application scenario consisting of documents from a digital archive of the Spanish Civil War.},
added-at = {2022-04-06T23:04:49.000+0200},
author = {Llados, J. and Sanchez, G.},
biburl = {https://www.bibsonomy.org/bibtex/2161aa30dc76c4a9f5f79f6d75b3ef7ea/petarkonig},
booktitle = {Ninth International Conference on Document Analysis and Recognition (ICDAR 2007)},
doi = {10.1109/ICDAR.2007.4378733},
interhash = {806e3791a5efe3f0112b2304097cafe1},
intrahash = {161aa30dc76c4a9f5f79f6d75b3ef7ea},
issn = {2379-2140},
keywords = {documents historicaldocuments indexing},
month = {Sep.},
pages = {362-366},
timestamp = {2022-04-06T23:04:49.000+0200},
title = {Indexing Historical Documents by Word Shape Signatures},
url = {http://ieeexplore.ieee.org/abstract/document/4378733},
volume = 1,
year = 2007
}