/* -*- mode: c -*-
 * =======================================================================
 * Copyright (c) 2000-2001
 * Internet Initiative Japan Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. All advertising materials mentioning features or use of this
 *    software must display the following acknowledgment:
 *
 *      This product includes software developed by Internet
 *      Initiative Japan Inc. for use in the mod_encoding module
 *      for Apache.
 *
 * 4. Products derived from this software may not be called "mod_encoding"
 *    nor may "mod_encoding" appear in their names without prior written
 *    permission of Internet Initiative Japan Inc. For written permission,
 *    please contact tai@iij.ad.jp (Taisuke Yamada).
 *
 * 5. Redistributions of any form whatsoever must retain the following
 *    acknowledgment:
 *
 *      This product includes software developed by Internet
 *      Initiative Japan Inc. for use in the mod_encoding module
 *      for Apache (http://www.apache.org/).
 *
 * THIS SOFTWARE IS PROVIDED BY INTERNET INITIATIVE JAPAN INC. ``AS IS''
 * AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTERNET
 * INITIATIVE JAPAN INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * mod_encoding: This is a module to improve I18N filename
 * interoperability of mod_dav (and other HTTP-based protocols, maybe).
 *
 * It seems many WebDAV clients send filename in its platform-local
 * encoding. But since mod_dav expects everything, even HTTP request
 * line, to be in UTF-8, this causes an interoperability problem.
 *
 * I believe this is a future issue for specification (RFC?) to
 * standardize encoding used in HTTP request-line and HTTP header,
 * but life would be much easier if mod_dav can handle various
 * encodings sent by clients, TODAY. This module does just that.
 *
 * [Configuration]
 *
 * Here follows configuration example of this module.
 *
 *   <IfModule mod_encoding.c>
 *    EncodingEngine on
 *    SetServerEncoding UTF-8
 *
 *    AddClientEncoding SJIS   "Microsoft .* DAV"
 *    AddClientEncoding SJIS   "xdwin9x/"
 *    AddClientEncoding EUC-JP "cadaver/"
 *   </IfModule>
 *
 * The point is to register non-standard compilant clients so
 * this module can detect which client input (=HTTP header) to
 * check and convert encoding. You can use extended regexp to
 * name the client.
 *
 * [TODO]
 *
 * There're times when you want charset other than UTF-8 for
 * local filesystem. Since mod_dav hardcodes UTF-8 as expected
 * filesystem encoding, this is not an easy fix (though in
 * principle, all you need to do is to convert output of readdir(3)).
 *
 * Also, when adding this feature, this module can no longer go
 * outside mod_dav, raising cost to keep up with mod_dav...
 *
 * @author  Taisuke Yamada <tai@iij.ad.jp>
 * @version $Revision: 1.1 $
 */

#include <httpd.h>
#include <http_config.h>
#include <http_core.h>
#include <http_log.h>
#include <http_protocol.h>
#include <http_request.h>

#include <iconv.h>

#ifndef MOD_ENCODING_DEBUG
#ifdef DEBUG
#define MOD_ENCODING_DEBUG 1
#else
#define MOD_ENCODING_DEBUG 0
#endif
#endif

#define DBG(expr) if (MOD_ENCODING_DEBUG) { expr; }

#define LOG(level, server, args...) \
        ap_log_error(APLOG_MARK, APLOG_NOERRNO|level, server, ##args)

/**
 * module-local information storage structure
 */
typedef struct {
  int           enable_function; /* flag to enable this module */
  char         *server_encoding;
  array_header *client_encoding;
} encoding_config;

module MODULE_VAR_EXPORT encoding_module;

/***************************************************************************
 * utility methods
 ***************************************************************************/

/**
 * Converts encoding of the input string.
 *
 * @param p      Memory pool of apache
 * @param cd     Conversion descriptor, made by iconv_open(3).
 * @param srcbuf Input string
 * @param srclen Length of the input string. Usually strlen(srcbuf).
 */
static char *
iconv_string(request_rec *r, iconv_t cd, char *srcbuf, size_t srclen) {

  char   *outbuf, *marker;
  size_t  outlen;

  if (srclen == 0) {
    LOG(APLOG_DEBUG, r->server, "iconv_string: skipping zero-length input");
    return srcbuf;
  }

  /* Allocate space for conversion. Note max bloat factor is 4 of UCS-4 */
  marker = outbuf = (char *)ap_palloc(r->pool, outlen = srclen * 4 + 1);

  if (outbuf == NULL) {
    LOG(APLOG_WARNING, r->server, "iconv_string: no more memory");
    return srcbuf;
  }

  /* Convert every character within input string. */
  while (srclen > 0) {
    if (iconv(cd, &srcbuf, &srclen, &outbuf, &outlen) == (size_t)(-1)) {
      LOG(APLOG_WARNING, r->server, "iconv_string: conversion error");
      return srcbuf;
    }
  }

  /* Everything done. Flush buffer/state and return result */
  iconv(cd, NULL, NULL, &outbuf, &outlen);
  iconv(cd, NULL, NULL,    NULL,    NULL);

  *outbuf = '\0';

  return marker;
}

/**
 * Nomalize charset in HTTP request line and HTTP header(s).
 *
 * @param  r Apache request object structure
 * @param cd Conversion descriptor, made by iconv_open(3).
 */
static void
iconv_header(request_rec *r, iconv_t cd) {

  char *buff;
  char *keys[] = { "Destination", NULL };
  int   i;

  /* Normalize encoding in HTTP request line */
  ap_unescape_url(r->unparsed_uri);
  buff = iconv_string(r, cd, r->unparsed_uri, strlen(r->unparsed_uri));
  ap_parse_uri(r, buff);

  /* Normalize encoding in HTTP request header(s) */
  for (i = 0 ; keys[i] ; i++) {
    if ((buff = (char *)ap_table_get(r->headers_in, keys[i])) != NULL) {
      ap_unescape_url(buff);
      buff = iconv_string(r, cd, buff, strlen(buff));
      ap_table_set(r->headers_in, keys[i], buff);
    }
  }
}

/**
 * Return the encoding (defaults to "UTF-8") named client
 * is expected to send.
 */
static const char *
get_client_encoding(request_rec *r,
            array_header *encmap, const char *lookup) {
  void **list = (void **)encmap->elts;
  int    i;

  LOG(APLOG_DEBUG, r->server, "get_client_encoding: entered");

  if (! lookup)
    return "UTF-8";

  LOG(APLOG_DEBUG, r->server, "get_client_encoding: lookup == %s", lookup);

  for (i = 0 ; i < encmap->nelts ; i += 2) {
    LOG(APLOG_DEBUG,
    r->server, "get_client_encoding: list[%d] == %s", i, (char *)list[i]);

    if (ap_regexec((regex_t *)list[i + 1], lookup, 0, NULL, 0) == 0) {
      return (char *)list[i];
    }
  }
  return "UTF-8";
}

/**
 * Handler for "EncodingEngine" directive.
 */
static const char *
set_encoding_engine(cmd_parms *parm, void *data, int flag) {
  encoding_config *conf;

  conf = ap_get_module_config(parm->server->module_config, &encoding_module);
  conf->enable_function = flag;

  return NULL;
}

/**
 * Handler for "SetServerEncoding" directive.
 */
static const char *
set_server_encoding(cmd_parms *parm, void *data, char *arg) {
  encoding_config *conf;

  conf = ap_get_module_config(parm->server->module_config, &encoding_module);
  conf->server_encoding = ap_pstrdup(parm->pool, arg);

  return NULL;
}

/**
 * Handler for "AddClientEncoding" directive.
 *
 * This registers regex pattern of UserAgent: header and expected
 * encoding from that useragent.
 */
static const char *
add_client_encoding(cmd_parms *parm, void *data, char *key, char *val) {
  encoding_config *conf;

  LOG(APLOG_DEBUG, parm->server, "add_client_encoding: entered");
  LOG(APLOG_DEBUG, parm->server, "add_client_encoding: key == %s", key);
  LOG(APLOG_DEBUG, parm->server, "add_client_encoding: val == %s", val);

  conf = ap_get_module_config(parm->server->module_config, &encoding_module);

  *(void **)ap_push_array(conf->client_encoding) =
    ap_pstrdup(parm->pool, key);
  *(void **)ap_push_array(conf->client_encoding) =
    ap_pregcomp(parm->pool, val, REG_EXTENDED|REG_ICASE|REG_NOSUB);

  return NULL;
}

/***************************************************************************
 * module-unique command table
 ***************************************************************************/

static const command_rec mod_enc_commands[] = {
  {"EncodingEngine",
   set_encoding_engine, NULL,
   OR_FILEINFO, FLAG,  "Usage: EncodingEngine (on|off)"},

  {"SetServerEncoding",
   set_server_encoding, NULL,
   OR_FILEINFO, TAKE1, "Usage: SetServerEncoding <encname>"},

  {"AddClientEncoding",
   add_client_encoding, NULL,
   OR_FILEINFO, TAKE2, "Usage: AddClientEncoding <encname> <user-agent>"},

  {NULL}
};

/***************************************************************************
 * module methods
 ***************************************************************************/

/**
 * Setup module internal data strcuture.
 */
static void *
config_setup(pool *p, server_rec *s) {

  encoding_config *conf;

  conf = (encoding_config *)ap_pcalloc(p, sizeof(encoding_config));
  conf->enable_function = 1;
  conf->server_encoding = "UTF-8";
  conf->client_encoding = ap_make_array(p, 2, sizeof(void *));

  return conf;
}

/**
 * Merge dirconfig. Currently does nothing.
 */
static void *
config_merge(pool *p, encoding_config *base, encoding_config *override) {
  return override;
}

/**
 * Hooked handler for post-read request.
 *
 * Here, expected encoding by client/server is determined, and
 * whenever needed, client input will be converted to that of
 * server-side expected encoding.
 */
static int
mod_enc_postread(request_rec *r) {

  encoding_config *conf;
  const char      *oenc, *ienc;
  iconv_t          cd;

  LOG(APLOG_DEBUG, r->server, "mod_enc_postread: entered");

  conf = (encoding_config *)
    ap_get_module_config(r->server->module_config, &encoding_module);

  if (! conf->enable_function) {
    return DECLINED;
  }

  oenc = conf->server_encoding;
  ienc = get_client_encoding(r, conf->client_encoding,
                 ap_table_get(r->headers_in, "User-Agent"));

  LOG(APLOG_DEBUG, r->server, "mod_enc_postread: ienc == %s", ienc);
  LOG(APLOG_DEBUG, r->server, "mod_enc_postread: oenc == %s", oenc);

  if (strcmp(ienc, oenc) == 0) {
    return DECLINED;
  }

  if ((cd = iconv_open(oenc, ienc)) != (iconv_t)(-1)) {
    iconv_header(r, cd);
    iconv_close(cd);
  }

  return DECLINED;
}

/***************************************************************************
 * exported module structure
 ***************************************************************************/

module MODULE_VAR_EXPORT encoding_module = {
  STANDARD_MODULE_STUFF,
  NULL,             /* initializer */
  NULL,             /* dir config */
  NULL,             /* dir config merger */
  config_setup,     /* server config */
  config_merge,     /* server config merger */
  mod_enc_commands, /* command table */
  NULL,             /* handlers */
  NULL,             /* filename translation */
  NULL,             /* check_user_id */
  NULL,             /* check auth */
  NULL,             /* check access */
  NULL,             /* type_checker */
  NULL,             /* fixups */
  NULL,             /* logger */
  NULL,             /* header parser */
  NULL,             /* child_init */
  NULL,             /* child_exit */
  mod_enc_postread, /* post read-request */
#ifdef EAPI
  NULL,             /* EAPI: add_module */
  NULL,             /* EAPI: remove_module */
  NULL,             /* EAPI: rewrite_command */
  NULL,             /* EAPI: new_connection */
#endif
};
