Build help index database using a combination of perl & C. Use

C to create and fill the database, to ensure the same db lib is
used to both create and read the db.


git-svn-id: svn+ssh://svn.gnucash.org/repo/gnucash/trunk@3338 57a11ea4-9604-0410-9ed3-97b8803252fd
This commit is contained in:
Dave Peticolas 2000-12-22 03:47:40 +00:00
parent ca7c372771
commit 1e3fd8b171
8 changed files with 215 additions and 41 deletions

View File

@ -1,5 +1,5 @@
SUBDIRS = macros debian doc intl lib src po rpm accounts
SUBDIRS = macros debian doc-tools doc intl lib src po rpm accounts
docdir = ${GNC_DOC_INSTALL_DIR}

View File

@ -1,38 +0,0 @@
#! /usr/bin/perl
use DB_File;
tie %contents, 'DB_File', $ARGV[0];
shift @ARGV;
foreach my $file (@ARGV) {
my %filewords;
open(HELPFILE, $file) or die "Bad help file $file specified.\n";
my $size = (stat($file))[7];
my $data;
read HELPFILE,$data,$size;
$data =~ s/<[^>]*>/ /gs; # get rid of HTML tags
$data =~ tr/\",();&<>!$*/ /; # get rid of extra punct
$data =~ tr/[A-Z]/[a-z]/; # lowercase everything
$data =~ tr/ \012\011/ /s; # crunch whitespace
$data =~ s/[\.,\'\":\;\+|-]+ / /gs; # get rid of terminal punct
$data =~ s/ [.,\'\":;+|-]+/ /gs; # get rid of initial punct
$data =~ s/ [^ ] / /gs; # remove 1-letter words
$data =~ s/ [^ ][^ ] / /gs; # remove 2-letter words
$data =~ tr/ \012\011/ /s; # crunch whitespace again
my @words = split(' ', $data);
@words = sort(@words);
foreach my $w (@words) {
$filewords{$w} = ' ';
}
foreach my $w (keys(%filewords)) {
my $flist = $contents{$w};
$contents{$w} = "$flist$file\012";
}
}
untie %contents;

View File

@ -398,6 +398,7 @@ AC_OUTPUT(
doc/sgml/Makefile
doc/sgml/C/Makefile
doc/sgml/C/image/Makefile
doc-tools/Makefile
intl/Makefile
lib/Makefile
macros/Makefile

4
doc-tools/.cvsignore Normal file
View File

@ -0,0 +1,4 @@
Makefile
Makefile.in
build-help-index
dbadd

27
doc-tools/Makefile.am Normal file
View File

@ -0,0 +1,27 @@
noinst_PROGRAMS = dbadd
LDADD = \
${DB_LIBS}
dbadd_SOURCES = \
dbadd.c
EXTRA_DIST = \
.cvsignore \
build-help-index.in
noinst_DATA = build-help-index
## We borrow guile's convention and use @-...-@ as the substitution
## brackets here, instead of the usual @...@. This prevents autoconf
## from substituting the values directly into the left-hand sides of
## the sed substitutions.
build-help-index: build-help-index.in
rm -f $@.tmp
sed < $@.in > $@.tmp \
-e 's:@-PERL-@:${PERL}:g'
chmod +x $@.tmp
mv $@.tmp $@
CLEANFILES += build-help-index

View File

@ -0,0 +1,62 @@
#!@-PERL-@ -w
# -*- perl -*-
use strict;
my %contents;
my $dbadd = $ARGV[0];
shift @ARGV;
my $db_file = $ARGV[0];
shift @ARGV;
foreach my $file (@ARGV) {
my %filewords;
open (HELPFILE, $file) or die "Bad help file $file specified.\n";
my $size = (stat($file))[7];
my $data;
read HELPFILE, $data, $size;
$data =~ s/<[^>]*>/ /gs; # get rid of HTML tags
$data =~ tr/\",();&<>!$*/ /; # get rid of extra punct
$data =~ tr/[A-Z]/[a-z]/; # lowercase everything
$data =~ tr/ \012\011/ /s; # crunch whitespace
$data =~ s/[\.,\'\":\;\+|-]+ / /gs; # get rid of terminal punct
$data =~ s/ [.,\'\":;+|-]+/ /gs; # get rid of initial punct
$data =~ s/ [^ ] / /gs; # remove 1-letter words
$data =~ s/ [^ ][^ ] / /gs; # remove 2-letter words
$data =~ tr/ \012\011/ /s; # crunch whitespace again
my @words = split (' ', $data);
@words = sort (@words);
foreach my $w (@words) {
$filewords{$w} = ' ';
}
foreach my $w (keys(%filewords)) {
my $flist = $contents{$w};
$flist = "" unless $flist;
$contents{$w} = "$flist$file\012";
}
}
my @keys = keys (%contents);
my @prefix;
# we don't store all the keys at once in case there
# are limits on the size of argv. But do more than
# one at a time for efficiency.
while (@prefix = splice (@keys, 0, 32)) {
my @args = ();
foreach my $w (@prefix) {
unshift (@args, $w, $contents{$w});
}
system ($dbadd, $db_file, @args);
}

116
doc-tools/dbadd.c Normal file
View File

@ -0,0 +1,116 @@
/********************************************************************\
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License as *
* published by the Free Software Foundation; either version 2 of *
* the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License*
* along with this program; if not, contact: *
* *
* Free Software Foundation Voice: +1-617-542-5942 *
* 59 Temple Place - Suite 330 Fax: +1-617-542-2652 *
* Boston, MA 02111-1307, USA gnu@gnu.org *
* *
\********************************************************************/
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
/* needed for db.h with 'gcc -ansi -pedantic' */
#ifndef _BSD_SOURCE
# define _BSD_SOURCE 1
#endif
#ifdef PREFER_DB1
#ifdef HAVE_DB1_DB_H
# include <db1/db.h>
#else
# ifdef HAVE_DB_185_H
# include <db_185.h>
# else
# include <db.h>
# endif
#endif
#else
#ifdef HAVE_DB_185_H
# include <db_185.h>
#else
# ifdef HAVE_DB_H
# include <db.h>
# else
# include <db1/db.h>
# endif
#endif
#endif
#define ZERO(Dbt) memset (&(Dbt), sizeof (DBT), 0)
static DB *database;
static void
usage (const char *name)
{
fprintf (stderr, "Usage: %s database key1 value1 key2 value2 ...\n", name);
exit(1);
}
int
main (int argc, char *argv[])
{
const char *db_name;
int i;
if (argc < 2)
usage (argv[0]);
if (argc % 2 != 0)
usage (argv[0]);
db_name = argv[1];
database = dbopen (db_name, O_CREAT | O_RDWR, 0644, DB_HASH, NULL);
if (!database)
{
fprintf (stderr, "Error opening database %s: %s\n",
db_name, strerror (errno));
exit (1);
}
for (i = 2; i < argc; i += 2)
{
DBT key;
DBT value;
ZERO (key);
ZERO (value);
key.data = argv[i];
key.size = strlen (key.data);
value.data = argv[i + 1];
value.size = strlen (value.data);
if (database->put (database, &key, &value, 0))
{
fprintf (stderr, "Error writing data.\n");
exit (1);
}
}
database->close (database);
return 0;
}

View File

@ -113,8 +113,10 @@ gnucash/index.html: $(GNUCASH_SGML_FILES) $(GNUCASH_HTML_FILES)
&& cp $(srcdir)/image/*.png gnucash/image || exit 1)
gnucash/help-search-index.db: $(GNUCASH_SGML_FILES) $(GNUCASH_HTML_FILES)
-(chmod a+x $(top_srcdir)/build-help-index.pl && cd gnucash \
&& ../$(top_srcdir)/build-help-index.pl help-search-index.db *.html)
-(cd gnucash && \
rm -f help-search-index.db && \
../$(top_srcdir)/doc-tools/build-help-index \
../$(top_srcdir)/doc-tools/dbadd help-search-index.db *.html)
dist-hook:
mkdir $(distdir)/gnucash