Build help index database using a combination of perl & C. Use

C to create and fill the database, to ensure the same db lib is used to both create and read the db. git-svn-id: svn+ssh://svn.gnucash.org/repo/gnucash/trunk@3338 57a11ea4-9604-0410-9ed3-97b8803252fd
2025-02-25 18:55:30 -06:00 · 2000-12-22 03:47:40 +00:00 · 2000-12-22 03:47:40 +00:00 · 1e3fd8b171
commit 1e3fd8b171
parent ca7c372771
8 changed files with 215 additions and 41 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -1,5 +1,5 @@

-SUBDIRS = macros debian doc intl lib src po rpm accounts
+SUBDIRS = macros debian doc-tools doc intl lib src po rpm accounts

 docdir = ${GNC_DOC_INSTALL_DIR}

--- a/build-help-index.pl
+++ b/build-help-index.pl
@ -1,38 +0,0 @@
-#! /usr/bin/perl
-
-use DB_File;
-
-tie %contents, 'DB_File', $ARGV[0];
-
-shift @ARGV;
-
-foreach my $file (@ARGV) {
-    my %filewords;
-
-    open(HELPFILE, $file) or die "Bad help file $file specified.\n";
-    my $size = (stat($file))[7];
-    my $data;
-    read HELPFILE,$data,$size;
-    $data =~ s/<[^>]*>/ /gs;          # get rid of HTML tags 
-    $data =~ tr/\",();&<>!$*/ /;     # get rid of extra punct
-    $data =~ tr/[A-Z]/[a-z]/;        # lowercase everything
-    $data =~ tr/ \012\011/ /s;       # crunch whitespace 
-    $data =~ s/[\.,\'\":\;\+|-]+ / /gs; # get rid of terminal punct 
-    $data =~ s/ [.,\'\":;+|-]+/ /gs;    # get rid of initial punct 
-    $data =~ s/ [^ ] / /gs;          # remove 1-letter words 
-    $data =~ s/ [^ ][^ ] / /gs;      # remove 2-letter words 
-    $data =~ tr/ \012\011/ /s;       # crunch whitespace again
-    my @words = split(' ', $data);
-    @words = sort(@words);
-    foreach my $w (@words) {
-	$filewords{$w} = ' ';
-    }
-    foreach my $w (keys(%filewords)) {
-	my $flist = $contents{$w};
-	$contents{$w} = "$flist$file\012";
-    }
-}
-
-untie %contents;
-
-
--- a/configure.in
+++ b/configure.in
@ -398,6 +398,7 @@ AC_OUTPUT(
          doc/sgml/Makefile
          doc/sgml/C/Makefile
          doc/sgml/C/image/Makefile
+          doc-tools/Makefile
          intl/Makefile
          lib/Makefile
          macros/Makefile
--- a/doc-tools/.cvsignore
+++ b/doc-tools/.cvsignore
@ -0,0 +1,4 @@
+Makefile
+Makefile.in
+build-help-index
+dbadd
--- a/doc-tools/Makefile.am
+++ b/doc-tools/Makefile.am
@ -0,0 +1,27 @@
+
+noinst_PROGRAMS = dbadd
+
+LDADD = \
+  ${DB_LIBS}
+
+dbadd_SOURCES = \
+  dbadd.c
+
+EXTRA_DIST = \
+  .cvsignore \
+  build-help-index.in
+
+noinst_DATA = build-help-index
+
+## We borrow guile's convention and use @-...-@ as the substitution
+## brackets here, instead of the usual @...@.  This prevents autoconf
+## from substituting the values directly into the left-hand sides of
+## the sed substitutions.
+build-help-index: build-help-index.in
+	rm -f $@.tmp
+	sed < $@.in > $@.tmp \
+            -e 's:@-PERL-@:${PERL}:g'
+	chmod +x $@.tmp
+	mv $@.tmp $@
+
+CLEANFILES += build-help-index
--- a/doc-tools/build-help-index.in
+++ b/doc-tools/build-help-index.in
@ -0,0 +1,62 @@
+#!@-PERL-@ -w
+# -*- perl -*-
+
+use strict;
+
+my %contents;
+
+my $dbadd = $ARGV[0];
+shift @ARGV;
+
+my $db_file = $ARGV[0];
+shift @ARGV;
+
+foreach my $file (@ARGV) {
+    my %filewords;
+
+    open (HELPFILE, $file) or die "Bad help file $file specified.\n";
+
+    my $size = (stat($file))[7];
+    my $data;
+
+    read HELPFILE, $data, $size;
+
+    $data =~ s/<[^>]*>/ /gs;            # get rid of HTML tags 
+    $data =~ tr/\",();&<>!$*/ /;        # get rid of extra punct
+    $data =~ tr/[A-Z]/[a-z]/;           # lowercase everything
+    $data =~ tr/ \012\011/ /s;          # crunch whitespace 
+    $data =~ s/[\.,\'\":\;\+|-]+ / /gs; # get rid of terminal punct 
+    $data =~ s/ [.,\'\":;+|-]+/ /gs;    # get rid of initial punct 
+    $data =~ s/ [^ ] / /gs;             # remove 1-letter words 
+    $data =~ s/ [^ ][^ ] / /gs;         # remove 2-letter words 
+    $data =~ tr/ \012\011/ /s;          # crunch whitespace again
+
+    my @words = split (' ', $data);
+    @words = sort (@words);
+
+    foreach my $w (@words) {
+	$filewords{$w} = ' ';
+    }
+
+    foreach my $w (keys(%filewords)) {
+	my $flist = $contents{$w};
+        $flist = "" unless $flist;
+	$contents{$w} = "$flist$file\012";
+    }
+}
+
+my @keys = keys (%contents);
+my @prefix;
+
+# we don't store all the keys at once in case there
+# are limits on the size of argv. But do more than
+# one at a time for efficiency.
+while (@prefix = splice (@keys, 0, 32)) {
+  my @args = ();
+
+  foreach my $w (@prefix) {
+    unshift (@args, $w, $contents{$w});
+  }
+
+  system ($dbadd, $db_file, @args);
+}
--- a/doc-tools/dbadd.c
+++ b/doc-tools/dbadd.c
@ -0,0 +1,116 @@
+/********************************************************************\
+ * This program is free software; you can redistribute it and/or    *
+ * modify it under the terms of the GNU General Public License as   *
+ * published by the Free Software Foundation; either version 2 of   *
+ * the License, or (at your option) any later version.              *
+ *                                                                  *
+ * This program is distributed in the hope that it will be useful,  *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of   *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    *
+ * GNU General Public License for more details.                     *
+ *                                                                  *
+ * You should have received a copy of the GNU General Public License*
+ * along with this program; if not, contact:                        *
+ *                                                                  *
+ * Free Software Foundation           Voice:  +1-617-542-5942       *
+ * 59 Temple Place - Suite 330        Fax:    +1-617-542-2652       *
+ * Boston, MA  02111-1307,  USA       gnu@gnu.org                   *
+ *                                                                  *
+\********************************************************************/
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+/* needed for db.h with 'gcc -ansi -pedantic' */
+#ifndef _BSD_SOURCE
+#  define _BSD_SOURCE 1
+#endif
+
+#ifdef PREFER_DB1
+#ifdef HAVE_DB1_DB_H
+# include <db1/db.h>
+#else
+# ifdef HAVE_DB_185_H
+#  include <db_185.h>
+# else
+#  include <db.h>
+# endif
+#endif
+#else
+#ifdef HAVE_DB_185_H
+# include <db_185.h>
+#else
+# ifdef HAVE_DB_H
+#  include <db.h>
+# else
+#  include <db1/db.h>
+# endif
+#endif
+#endif
+
+
+#define ZERO(Dbt) memset (&(Dbt), sizeof (DBT), 0)
+
+static DB *database;
+
+
+static void
+usage (const char *name)
+{
+  fprintf (stderr, "Usage: %s database key1 value1 key2 value2 ...\n", name);
+  exit(1);
+}
+
+int
+main (int argc, char *argv[])
+{
+  const char *db_name;
+  int i;
+
+  if (argc < 2)
+    usage (argv[0]);
+
+  if (argc % 2 != 0)
+    usage (argv[0]);
+
+  db_name = argv[1];
+
+  database = dbopen (db_name, O_CREAT | O_RDWR, 0644, DB_HASH, NULL);
+  if (!database)
+  {
+    fprintf (stderr, "Error opening database %s: %s\n",
+             db_name, strerror (errno));
+    exit (1);
+  }
+
+  for (i = 2; i < argc; i += 2)
+  {
+    DBT key;
+    DBT value;
+
+    ZERO (key);
+    ZERO (value);
+
+    key.data = argv[i];
+    key.size = strlen (key.data);
+
+    value.data = argv[i + 1];
+    value.size = strlen (value.data);
+
+    if (database->put (database, &key, &value, 0))
+    {
+      fprintf (stderr, "Error writing data.\n");
+      exit (1);
+    }
+  }
+
+  database->close (database);
+
+  return 0;
+}
--- a/doc/sgml/C/Makefile.am
+++ b/doc/sgml/C/Makefile.am
@ -113,8 +113,10 @@ gnucash/index.html: $(GNUCASH_SGML_FILES) $(GNUCASH_HTML_FILES)
 	  && cp $(srcdir)/image/*.png gnucash/image || exit 1) 

 gnucash/help-search-index.db: $(GNUCASH_SGML_FILES) $(GNUCASH_HTML_FILES)
-	-(chmod a+x $(top_srcdir)/build-help-index.pl && cd gnucash \
-	  && ../$(top_srcdir)/build-help-index.pl help-search-index.db *.html)
+	-(cd gnucash && \
+	  rm -f help-search-index.db && \
+	  ../$(top_srcdir)/doc-tools/build-help-index \
+            ../$(top_srcdir)/doc-tools/dbadd help-search-index.db *.html)

 dist-hook:
 	mkdir $(distdir)/gnucash