--- .gitignore | 2 +- Makefile.in | 3 + tools/linkcheckerrc | 272 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 tools/linkcheckerrc diff --git a/.gitignore b/.gitignore index d314cc7..1209aa3 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,4 @@ httpd/logs/*_log httpd/run/*.pid */out */data/templates/translations.html - +*/linkchecker-out.csv diff --git a/Makefile.in b/Makefile.in index 4251629..3091f6d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -84,6 +84,9 @@ stoptest: clean pid=`cat $(HTTPDDIR)/run/httpd.pid` && \ kill -TERM $$pid +linktest: + linkchecker -q -f ../tools/linkcheckerrc -v http://localhost:5000 + clean: rm -rf out rm -f po/*.mo diff --git a/tools/linkcheckerrc b/tools/linkcheckerrc new file mode 100644 index 0000000..a24b536 --- /dev/null +++ b/tools/linkcheckerrc @@ -0,0 +1,272 @@ +# Sample configuration file; see the linkcheckerrc(5) man page or +# execute linkchecker -h for help on these options. +# Commandline or GUI options override these settings. + +##################### output configuration ########################## +[output] +# enable debug messages; see 'linkchecker -h' for valid debug names +#debug=all +# print status output +#status=1 +# change the logging type +#log=xml +# turn on/off --verbose +#verbose=1 +# turn on/off --warnings +#warnings=0 +# turn on/off --quiet +quiet=1 +# additional file output +#fileoutput = text, html, gml, sql +fileoutput = csv + + +##################### logger configuration ########################## +# Note that the logger configuration is ignored by the linkchecker-gui +# program. Results in the GUI can be saved to a file with the command +# File -> Save results. +# +# logger output part names: +# all For all parts +# realurl The full url link +# result Valid or invalid, with messages +# extern 1 or 0, only in some logger types reported +# base <base href=...> +# name <a href=...>name</a> and <img alt="name"> +# parenturl The referrer URL if there is any +# info Some additional info, e.g. FTP welcome messages +# warning Warnings +# dltime Download time +# checktime Check time +# url The original url name, can be relative +# intro The blurb at the beginning, "starting at ..." +# outro The blurb at the end, "found x errors ..." +# stats Statistics including URL lengths and contents. + +# each Logger can have separate configuration parameters + +# standard text logger +[text] +#filename=linkchecker-out.txt +#parts=all +# colors for the various parts, syntax is <color> or <type>;<color> +# type can be bold, light, blink, invert +# color can be default, black, red, green, yellow, blue, purple, cyan, white, +# Black, Red, Green, Yellow, Blue, Purple, Cyan, White +#colorparent=white +#colorurl=default +#colorname=default +#colorreal=cyan +#colorbase=purple +#colorvalid=bold;green +#colorinvalid=bold;red +#colorinfo=default +#colorwarning=bold;yellow +#colordltime=default +#colorreset=default + +# GML logger +[gml] +#filename=linkchecker-out.gml +#parts=all +# valid encodings are listed in http://docs.python.org/library/codecs.html#standard-encodings +# default encoding is iso-8859-15 +#encoding=utf_16 + +# DOT logger +[dot] +#filename=linkchecker-out.dot +#parts=all +# default encoding is ascii since the original DOT format does not +# support other charsets +#encoding=iso-8859-15 + +# CSV logger +[csv] +#filename=linkchecker-out.csv +separator=, +quotechar=" +#parts=all + +# SQL logger +[sql] +#filename=linkchecker-out.sql +#dbname=linksdb +#separator=; +#parts=all + +# HTML logger +[html] +#filename=linkchecker-out.html +# colors for the various parts +#colorbackground=#fff7e5 +#colorurl=#dcd5cf +#colorborder=#000000 +#colorlink=#191c83 +#colorwarning=#e0954e +#colorerror=#db4930 +#colorok=#3ba557 +#parts=all + +# blacklist logger +[blacklist] +#filename=~/.linkchecker/blacklist + +# custom xml logger +[xml] +#encoding=iso-8859-1 + +# GraphXML logger +[gxml] +#encoding=iso-8859-1 + +# Sitemap logger +[sitemap] +#priority=0.7 +#frequency=weekly + + +##################### checking options ########################## +[checking] +# number of threads +threads=100 +# connection timeout in seconds +timeout=10 +# Time to wait for checks to finish after the user aborts the first time +# (with Ctrl-C or the abort button). +#aborttimeout=300 +# The recursion level determines how many times links inside pages are followed. +#recursionlevel=1 +# Basic NNTP server. Overrides NNTP_SERVER environment variable. +#nntpserver= +# parse a cookiefile for initial cookie data +#cookiefile=/path/to/cookies.txt +# User-Agent header string to send to HTTP web servers +# Note that robots.txt are always checked with the original User-Agent. +useragent=Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) +# When checking finishes, write a memory dump to a temporary file. +# The memory dump is written both when checking finishes normally +# and when checking gets canceled. +# The memory dump only works if the python-meliae package is installed. +# Otherwise a warning is printed to install it. +#debugmemory=0 +# When checking absolute URLs inside local files, the given root directory +# is used as base URL. +# Note that the given directory must have URL syntax, so it must use a slash +# to join directories instead of a backslash. +# And the given directory must end with a slash. +# Unix example: +#localwebroot=/var/www/ +# Windows example: +#localwebroot=/C|/public_html/ +# Check SSL certificates. Set to an absolute pathname for a custom +# CA cert bundle to use. Set to zero to disable SSL certificate verification. +sslverify=0 +# Stop checking new URLs after the given number of seconds. Same as if the +# user hits Ctrl-C after X seconds. +#maxrunseconds=600 +# Maximum number of URLs to check. New URLs will not be queued after the +# given number of URLs is checked. +#maxnumurls=153 +# Maximum number of requests per second to one host. +#maxrequestspersecond=10 +# Allowed URL schemes as a comma-separated list. +#allowedschemes=http,https + +##################### filtering options ########################## +[filtering] +#ignore= +# ignore everything with 'lconline' in the URL name +# lconline +# and ignore everything with 'bookmark' in the URL name +# bookmark +# and ignore all mailto: URLs +# ^mailto: +# do not recurse into the following URLs + +#nofollow= +# just an example +# http://www\.example\.com/bla + +# Ignore specified warnings (see linkchecker -h for the list of +# recognized warnings). Add a comma-separated list of warnings here +# that prevent a valid URL from being logged. Note that the warning +# will be logged in invalid URLs. +#ignorewarnings=url-unicode-domain +# Regular expression to add more URLs recognized as internal links. +# Default is that URLs given on the command line are internal. +#internlinks=^http://www\.example\.net/ +# Check external links +checkextern=1 + + +##################### password authentication ########################## +[authentication] +# WARNING: if you store passwords in this configuration entry, make sure the +# configuration file is not readable by other users. +# Different user/password pairs for different URLs can be provided. +# Entries are a triple (URL regular expression, username, password), +# separated by whitespace. +# If the regular expression matches, the given user/password pair is used +# for authentication. The commandline options -u,-p match every link +# and therefore override the entries given here. The first match wins. +# At the moment, authentication is used for http[s] and ftp links. +#entry= +# Note that passwords are optional. If any passwords are stored here, +# this file should not readable by other users. +# ^https?://www\.example\.com/~calvin/ calvin mypass +# ^ftp://www\.example\.com/secret/ calvin + +# if the website requires a login the URL and optionally the user and +# password CGI fieldnames can be provided. +#loginurl=http://www.example.com/ + +# The name of the user and password CGI field +#loginuserfield=login +#loginpasswordfield=password +# Optionally any additional CGI name/value pairs. Note that the default +# values are submitted automatically. +#loginextrafields= +# name1:value1 +# name 2:value 2 + +############################ Plugins ################################### +# +# uncomment sections to enable plugins + +# Check HTML anchors +#[AnchorCheck] + +# Print HTTP header info +#[HttpHeaderInfo] +# Comma separated list of header prefixes to print. +# The names are case insensitive. +# The default list is empty, so it should be non-empty when activating +# this plugin. +#prefixes=Server,X- + +# Add country info to URLs +#[LocationInfo] + +# Run W3C syntax checks +#[CssSyntaxCheck] +#[HtmlSyntaxCheck] + +# Search for regular expression in page contents +#[RegexCheck] +#warningregex=Oracle Error + +# Search for viruses in page contents +#[VirusCheck] +#clamavconf=/etc/clamav/clam.conf + +# Check that SSL certificates are at least the given number of days valid. +#[SslCertificateCheck] +#sslcertwarndays=14 + +# Parse and check links in PDF files +#[PdfParser] + +# Parse and check links in Word files +#[WordParser] + -- 2.4.3 -- websites mailing list websites@xxxxxxxxxxxxxxxxxxxxxxx https://admin.fedoraproject.org/mailman/listinfo/websites