The documentation URL you've looked at is certainly a good starting point.
If you've created a local collection, and it's updating successfully (the collection update logs could confirm this), the issue is probably with an indexing and /or query processing configuration.
<rdf:RDF xmlns:cc="http://web.resource.org/cc/" xmlns:dcam="http://purl.org/dc/dcam/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:marcrel="http://id.loc.gov/vocabulary/relators" xmlns:pgterms="http://www.gutenberg.org/2009/pgterms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xml:base="http://www.gutenberg.org/">
<cc:Work rdf:about="feeds/catalog.rdf">
<cc:license rdf:resource="http://www.gnu.org/licenses/gpl.html" />
</cc:Work>
<pgterms:ebook rdf:about="ebooks/1342">
<dcterms:creator rdf:resource="2009/agents/68" />
<dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/1342.epub.noimages" />
<dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/1342.kindle.noimages" />
<dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/1342.plucker" />
<dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/1342.qioo" />
<dcterms:hasFormat rdf:resource="http://www.gutenberg.org/ebooks/1342.txt.utf-8" />
<dcterms:hasFormat rdf:resource="http://www.gutenberg.org/files/1342/1342-h.zip" />
<dcterms:hasFormat rdf:resource="http://www.gutenberg.org/files/1342/1342-h/1342-h.htm" />
<dcterms:hasFormat rdf:resource="http://www.gutenberg.org/files/1342/1342-pdf.pdf" />
<dcterms:hasFormat rdf:resource="http://www.gutenberg.org/files/1342/1342-pdf.zip" />
<dcterms:hasFormat rdf:resource="http://www.gutenberg.org/files/1342/1342.txt" />
<dcterms:hasFormat rdf:resource="http://www.gutenberg.org/files/1342/1342.zip" />
<dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">1998-06-01</dcterms:issued>
<dcterms:language rdf:datatype="http://purl.org/dc/terms/RFC4646">en</dcterms:language>
<dcterms:license rdf:resource="license" />
<dcterms:publisher>Project Gutenberg</dcterms:publisher>
<dcterms:rights>Public domain in the USA.</dcterms:rights>
<dcterms:subject>
<dcterms:subject>
<dcterms:title>Pride and Prejudice</dcterms:title>
<dcterms:type></pgterms:ebook>
<pgterms:agent rdf:about="2009/agents/68">
<pgterms:birthdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1775</pgterms:birthdate>
<pgterms:deathdate rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1817</pgterms:deathdate>
<pgterms:name>Austen, Jane</pgterms:name>
<pgterms:webpage rdf:resource="http://en.wikipedia.org/wiki/Jane_Austen" />
</pgterms:agent>
<rdf:Description rdf:about="http://en.wikipedia.org/wiki/Jane_Austen">
<dcterms:description>Wikipedia</dcterms:description>
</rdf:Description>
<pgterms:file rdf:about="http://www.gutenberg.org/files/1342/1342-h/1342-h.htm">
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">821974</dcterms:extent>
<dcterms:format>
<dcterms:isFormatOf rdf:resource="ebooks/1342" />
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-08-21T06:59:48</dcterms:modified>
</pgterms:file>
<pgterms:file rdf:about="http://www.gutenberg.org/files/1342/1342-h.zip">
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">270317</dcterms:extent>
<dcterms:format>
<dcterms:isFormatOf rdf:resource="ebooks/1342" />
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-08-21T07:00:26</dcterms:modified>
</pgterms:file>
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/1342.epub.noimages">
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">285830</dcterms:extent>
<dcterms:format>
<dcterms:isFormatOf rdf:resource="ebooks/1342" />
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-08-21T23:27:58.417724</dcterms:modified>
</pgterms:file>
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/1342.kindle.noimages">
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1198390</dcterms:extent>
<dcterms:format>
<dcterms:isFormatOf rdf:resource="ebooks/1342" />
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-08-21T23:28:01.943507</dcterms:modified>
</pgterms:file>
<pgterms:file rdf:about="http://www.gutenberg.org/files/1342/1342-pdf.pdf">
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1570692</dcterms:extent>
<dcterms:format>
<dcterms:isFormatOf rdf:resource="ebooks/1342" />
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2011-11-28T13:14:32</dcterms:modified>
</pgterms:file>
<pgterms:file rdf:about="http://www.gutenberg.org/files/1342/1342-pdf.zip">
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1232905</dcterms:extent>
<dcterms:format>
<dcterms:isFormatOf rdf:resource="ebooks/1342" />
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2011-11-28T13:19:30</dcterms:modified>
</pgterms:file>
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/1342.plucker">
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">422108</dcterms:extent>
<dcterms:format>
<dcterms:isFormatOf rdf:resource="ebooks/1342" />
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-08-21T23:28:05.568284</dcterms:modified>
</pgterms:file>
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/1342.qioo">
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">321760</dcterms:extent>
<dcterms:format>
<dcterms:isFormatOf rdf:resource="ebooks/1342" />
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-08-21T23:27:57.496782</dcterms:modified>
</pgterms:file>
<pgterms:file rdf:about="http://www.gutenberg.org/ebooks/1342.txt.utf-8">
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">717569</dcterms:extent>
<dcterms:format>
<dcterms:isFormatOf rdf:resource="ebooks/1342" />
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-08-21T23:27:57.273790</dcterms:modified>
</pgterms:file>
<pgterms:file rdf:about="http://www.gutenberg.org/files/1342/1342.txt">
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">717597</dcterms:extent>
<dcterms:format>
<dcterms:isFormatOf rdf:resource="ebooks/1342" />
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-08-21T06:58:42</dcterms:modified>
</pgterms:file>
<pgterms:file rdf:about="http://www.gutenberg.org/files/1342/1342.zip">
<dcterms:extent rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">259663</dcterms:extent>
<dcterms:format>
<dcterms:isFormatOf rdf:resource="ebooks/1342" />
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2013-08-21T07:00:26</dcterms:modified>
</pgterms:file>
</rdf:RDF>
For portability purposes, I've downloaded the zipped dump and extracted it to my collection's configuration folder ($SEARCH_HOME/conf/$COLLECTION/source).
By default, Funnelback doesn't index it's own installation folder for local collections - I'll need to disable this behaviour for this collection via collection.cfg (Administer > Edit Collection Settings > Indexer):
-check_url_exclusion=off -ifb
Looking at the example XML record from my collection, I've determined that there's only a handful of fields that I need indexing. Updates to xml.cfg look like:
PADRE XML Mapping Version: 2
t,1,,//dcterms:title
s,1,,//dcterms:subject/rdf:Description/rdf:value
e,0,,//dcterms:type/rdf:Description/rdf:value
d,0,,//dcterms:issued
l,0,,//dcterms:language
r,0,,//dcterms:rights
a,1,,//pgterms:agent/pgterms:name
f,0,,//pgterms:file/dcterms:format/rdf:Description/rdf:value
o,0,,//dcterms:isFormatOf@rdf:resource
g,0,,//dcterms:hasFormat@rdf:resource
I,0,,//pgterms:ebook/pgterms:marc901
Any updates to the xml.cfg file will require a reindexing before they take effect (Update > Start Advanced Update > Reindex Live View).
A few more modifications to my collection.cfg file are required to ensure that these fields are returned in the data model when I conduct a query. Further detail available from the Custom Summaries documentation
My final collection.cfg file looks like:
#
# Filename: /opt/funnelback/conf/project-gutenberg/collection.cfg
# Last Update: Thu Apr 24 14:17:46 2014
#
click_tracking.restrict_redirects_to_existing_urls_and_fps=true
collection=project-gutenberg
collection_type=local
data_root=$SEARCH_HOME/conf/$COLLECTION_NAME/source
indexer_options=-check_url_exclusion=off -ifb
query_processor_options=-stem=2 -SM=meta -SF=acdfgiotI -countgbits=all
service_name=Project Gutenberg
ui_cache_link=/s/cache.html
Looking at the JSON output for my search query for 'pride and prejudice', I can see that all indexed fields are now coming back in my results' summaries:
/s/search.json?collection=project-gutenberg&query=pride%20and%20prejudice
...
results: [
{
rank: 1,
score: 1000,
title: "Pride and Prejudice",
collection: "project-gutenberg",
component: 0,
collapsed: null,
liveUrl: "file:///opt/funnelback/conf/project-gutenberg/source/cache/epub/42671/pg42671.rdf",
summary: null,
cacheUrl: "/s/cache.html?collection=project-gutenberg&doc=cache/epub/42671/pg42671.rdf&off=0&len=-1&url=file%3A%2F%2F%2Fopt%2Ffunnelback%2Fconf%2Fproject-gutenberg%2Fsource%2Fcache%2Fepub%2F42671%2Fpg42671.rdf&profile=_default_preview",
date: 1368021600000,
fileSize: 11840,
fileType: "xml",
tier: 1,
docNum: 2966,
exploreLink: null,
kmFromOrigin: null,
metaData: {
f: "text/html|application/zip|text/html|application/epub+zip|application/epub+zip|application/x-mobipocket-ebook|application/x-mobipocket-ebook|application/prs.plucker|application/x-qioo-ebook|text/plain|text/plain;",
g: "http://www.gutenberg.org/ebooks/42671.epub.images|http://www.gutenberg.org/ebooks/42671.epub.noimages|http://www.gutenberg.org/ebooks/42671.kindle.images|http://www.gutenberg.org/ebooks/42671.kindle.noimages|http://www.gutenberg.org/ebooks/42671.pluc",
d: "2013-05-09",
t: "Pride and Prejudice",
a: "Austen, Jane|Chapman, R. W. (Robert William)",
o: "ebooks/42671|ebooks/42671|ebooks/42671|ebooks/42671|ebooks/42671|ebooks/42671|ebooks/42671|ebooks/42671|ebooks/42671|ebooks/42671|ebooks/42671|ebooks/42671|ebooks/42671"
},
tags: [ ],
quickLinks: null,
displayUrl: "/opt/funnelback/conf/project-gutenberg/source/cache/epub/42671/pg42671.rdf",
clickTrackingUrl: "/s/redirect?rank=1&collection=project-gutenberg&url=file%3A%2F%2F%2Fopt%2Ffunnelback%2Fconf%2Fproject-gutenberg%2Fsource%2Fcache%2Fepub%2F42671%2Fpg42671.rdf&index_url=file%3A%2F%2F%2Fopt%2Ffunnelback%2Fconf%2Fproject-gutenberg%2Fsource%2Fcache%2Fepub%2F42671%2Fpg42671.rdf&auth=hTZZnzKI8emfGclXIjPyxg&query=pride+and+prejudice&profile=_default_preview",
explain: null,
indexUrl: "file:///opt/funnelback/conf/project-gutenberg/source/cache/epub/42671/pg42671.rdf",
customData: { }
},
...
Finally, some faceting would by Author, Subject, etc. would be nice. The following ended up in faceted_navigation.cfg:
<Facets qpoptions=" -rmcf=elrafs -count_dates=d">
<Data></Data>
<Facet>
<Data>Author</Data>
<MetadataFieldFill>
<Data>a</Data>
</MetadataFieldFill>
</Facet>
<Facet>
<Data>Subject</Data>
<MetadataFieldFill>
<Data>s</Data>
</MetadataFieldFill>
</Facet>
<Facet>
<Data>Release Date</Data>
<DateFieldFill>
<Data>d</Data>
</DateFieldFill>
</Facet>
<Facet>
<Data>Language</Data>
<MetadataFieldFill>
<Data>l</Data>
</MetadataFieldFill>
</Facet>
<Facet>
<Data>Format</Data>
<MetadataFieldFill>
<Data>f</Data>
</MetadataFieldFill>
</Facet>
<Facet>
<Data>Licence</Data>
<MetadataFieldFill>
<Data>r</Data>
</MetadataFieldFill>
</Facet>
<Facet>
<Data>Type</Data>
<MetadataFieldFill>
<Data>e</Data>
</MetadataFieldFill>
</Facet>
</Facets>