mirror of
https://github.com/Gnucash/gnucash.git
synced 2025-02-25 18:55:30 -06:00
153 lines
5.2 KiB
Scheme
153 lines
5.2 KiB
Scheme
|
;;;; url.scm: URL manipulation tools.
|
|||
|
|
|||
|
(define-module (www url)
|
|||
|
:use-module (ice-9 regex))
|
|||
|
|
|||
|
;;;; Copyright (C) 1997 Free Software Foundation, Inc.
|
|||
|
;;;;
|
|||
|
;;;; This program is free software; you can redistribute it and/or modify
|
|||
|
;;;; it under the terms of the GNU General Public License as published by
|
|||
|
;;;; the Free Software Foundation; either version 2, or (at your option)
|
|||
|
;;;; any later version.
|
|||
|
;;;;
|
|||
|
;;;; This program is distributed in the hope that it will be useful,
|
|||
|
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|||
|
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|||
|
;;;; GNU General Public License for more details.
|
|||
|
;;;;
|
|||
|
;;;; You should have received a copy of the GNU General Public License
|
|||
|
;;;; along with this software; see the file COPYING. If not, write to
|
|||
|
;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
|
|||
|
;;;; Boston, MA 02111-1307 USA
|
|||
|
;;;;
|
|||
|
|
|||
|
;;;; TODO:
|
|||
|
;;;; * support `user:password@' strings where appropriate in URLs.
|
|||
|
;;;; * make URL parsing smarter. This is good for most TCP/IP-based
|
|||
|
;;;; URL schemes, but parsing is actually specific to each URL scheme.
|
|||
|
;;;; * fill out url:encode, include facilities for URL-scheme-specific
|
|||
|
;;;; encoding methods (e.g. a url-scheme-reserved-char-alist)
|
|||
|
|
|||
|
;; `url:scheme' is an unfortunate term, but it is the technical
|
|||
|
;; name for that portion of the URL according to RFC 1738. Sigh.
|
|||
|
|
|||
|
(define-public (url:scheme url) (vector-ref url 0))
|
|||
|
(define-public (url:address url) (vector-ref url 1))
|
|||
|
(define-public (url:unknown url) (vector-ref url 1))
|
|||
|
(define-public (url:user url) (vector-ref url 1))
|
|||
|
(define-public (url:host url) (vector-ref url 2))
|
|||
|
(define-public (url:port url) (vector-ref url 3))
|
|||
|
(define-public (url:path url) (vector-ref url 4))
|
|||
|
|
|||
|
(define-public (url:make scheme . args)
|
|||
|
(apply vector scheme args))
|
|||
|
(define-public (url:make-http host port path)
|
|||
|
(vector 'http #f host port path))
|
|||
|
(define-public (url:make-ftp user host port path)
|
|||
|
(vector 'ftp user host port path))
|
|||
|
(define-public (url:make-mailto address)
|
|||
|
(vector 'mailto address))
|
|||
|
|
|||
|
(define http-regexp (make-regexp "^http://([^:/]+)(:([0-9]+))?(/(.*))?$"))
|
|||
|
(define ftp-regexp
|
|||
|
(make-regexp "^ftp://(([^@:/]+)@)?([^:/]+)(:([0-9]+))?(/(.*))?$"))
|
|||
|
(define mailto-regexp (make-regexp "^mailto:(.*)$"))
|
|||
|
|
|||
|
(define-public (url:parse url)
|
|||
|
(cond
|
|||
|
((regexp-exec http-regexp url)
|
|||
|
=> (lambda (m)
|
|||
|
(url:make-http (match:substring m 1)
|
|||
|
(cond ((match:substring m 3) => string->number)
|
|||
|
(else #f))
|
|||
|
(match:substring m 5))))
|
|||
|
|
|||
|
((regexp-exec ftp-regexp url)
|
|||
|
=> (lambda (m)
|
|||
|
(url:make-ftp (match:substring m 2)
|
|||
|
(match:substring m 3)
|
|||
|
(cond ((match:substring m 5) => string->number)
|
|||
|
(else #f))
|
|||
|
(match:substring m 7))))
|
|||
|
|
|||
|
((regexp-exec mailto-regexp url)
|
|||
|
=> (lambda (m)
|
|||
|
(url:make-mailto (match:substring m 1))))
|
|||
|
|
|||
|
(else
|
|||
|
(url:make 'unknown url))))
|
|||
|
|
|||
|
|
|||
|
(define-public (url:unparse url)
|
|||
|
(define (pathy scheme username host port path)
|
|||
|
(string-append (symbol->string scheme)
|
|||
|
"://" host
|
|||
|
(if port (string-append ":" (number->string port))
|
|||
|
"")
|
|||
|
(if path (string-append "/" path)
|
|||
|
"")))
|
|||
|
(case (url:scheme url)
|
|||
|
((http) (pathy 'http #f
|
|||
|
(url:host url)
|
|||
|
(url:port url)
|
|||
|
(url:path url)))
|
|||
|
((ftp) (pathy 'ftp
|
|||
|
(url:user url)
|
|||
|
(url:host url)
|
|||
|
(url:port url)
|
|||
|
(url:path url)))
|
|||
|
((mailto) (string-append "mailto:" (url:address url)))
|
|||
|
((unknown) (url:unknown url))))
|
|||
|
|
|||
|
|
|||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|||
|
;; (url-decode STR)
|
|||
|
;; Turn + into space, and hex-encoded %XX strings into their
|
|||
|
;; eight-bit characters. Is a regexp faster than character
|
|||
|
;; scanning? Does it incur more overhead (which may be more
|
|||
|
;; important for code that frequently gets restarted)?
|
|||
|
|
|||
|
(define-public (url:decode str)
|
|||
|
(regexp-substitute/global #f "\\+|%([0-9A-Fa-f][0-9A-Fa-f])" str
|
|||
|
'pre
|
|||
|
(lambda (m)
|
|||
|
(cond ((string=? "+" (match:substring m 0)) " ")
|
|||
|
(else (integer->char
|
|||
|
(string->number
|
|||
|
(match:substring m 1)
|
|||
|
16)))))
|
|||
|
'post))
|
|||
|
|
|||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|||
|
;; (url-encode STR)
|
|||
|
;; The inverse of url-decode. Can't be done easily with
|
|||
|
;; a regexp: we would have to construct a regular expression
|
|||
|
;; like "[\277-\377]", for example, and Guile strings don't
|
|||
|
;; let you interpolate character literals. Pity.
|
|||
|
;; URL-encode any characters in STR that are not safe: these
|
|||
|
;; include any character not in the SAFE-CHARS list and any
|
|||
|
;; character that *is* in the RESERVED-CHARS list argument.
|
|||
|
|
|||
|
(define-public (url:encode str reserved-chars)
|
|||
|
(with-output-to-string
|
|||
|
(lambda ()
|
|||
|
(for-each (lambda (ch)
|
|||
|
(if (and (safe-char? ch)
|
|||
|
(not (memv ch reserved-chars)))
|
|||
|
(display ch)
|
|||
|
(begin
|
|||
|
(display #\%)
|
|||
|
(display (number->string (char->integer ch) 16)))))
|
|||
|
(string->list str)))))
|
|||
|
|
|||
|
(define safe-chars
|
|||
|
'(#\$ #\- #\_ #\. #\+ #\! #\* #\' #\( #\) #\, #\; #\/ #\? #\: #\@ #\& #\=))
|
|||
|
|
|||
|
(define (safe-char? ch)
|
|||
|
;; ``Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
|
|||
|
;; reserved characters used for their reserved purposes may be used
|
|||
|
;; unencoded within a URL.'' RFC 1738, #2.2.
|
|||
|
(or (char-alphabetic? ch)
|
|||
|
(char-numeric? ch)
|
|||
|
(memv ch safe-chars)))
|