###############################################################################
#                                                                             #
# Peekaboo Extended Email Attachment Behavior Observation Owl                 #
#                                                                             #
# 10-ask_peekaboo
###############################################################################
#                                                                             #
# Copyright (C) 2016-2020  science + computing ag                             #
#                                                                             #
# This program is free software: you can redistribute it and/or modify        #
# it under the terms of the GNU General Public License as published by        #
# the Free Software Foundation, either version 3 of the License, or (at       #
# your option) any later version.                                             #
#                                                                             #
# This program is distributed in the hope that it will be useful, but         #
# WITHOUT ANY WARRANTY; without even the implied warranty of                  #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU           #
# General Public License for more details.                                    #
#                                                                             #
# You should have received a copy of the GNU General Public License           #
# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
#                                                                             #
###############################################################################

use strict;

# define our own JSON-based protocol by plugging into AMaViS' AV control flow.
# Standard ask_daemon call graph:
#
# virus_scan (iterate over active virus scanners)
# -> ask_daemon (decide what protocol routine to use)
# -> run_av (iterate over files, run scanner and collect results)
# -> ask_daemon_internal (protocol routine: communicate with scanner)
#
# Our call graph:
#
# virus_scan
# -> ask_peekaboo (force our protocol routine)
# -> run_av
# -> ask_peekaboo_internal (override query to JSON)
# -> ask_daemon_internal
#
# See below for reasoning.

# use pure-perl implementation since performance should be enough for our needs
# and it's been a core module since 5.14, avoiding an additional dependency on
# JSON or JSON::XS
use JSON::PP;
# Yes, Amavis has its own minimalist JSON implementation Amavis::JSON. But it
# doesn't seem to do UTF-8 encoding correctly - everything ends up as latin1.

# monkey patch a content_disposition setter/getter into the Parts package
*{Amavis::Unpackers::Part::content_disposition} = sub {
  @_<2 ? shift->{co_disp} : ($_[0]->{co_disp} = $_[1]);
};

# monkey patch a wrapper for mime_traverse into the MIME package
my $amavis_mime_traverse = \&Amavis::Unpackers::MIME::mime_traverse;
*{Amavis::Unpackers::MIME::mime_traverse} = sub ($$$$$) {
  my($entity, $tempdir, $parent_obj, $depth, $placement) = @_;

  &$amavis_mime_traverse($entity, $tempdir, $parent_obj, $depth, $placement);

  # go through list of children detected by call above and assign additional
  # properties
  foreach my $child (@{$parent_obj->children}) {
    $child->content_disposition($entity->head->mime_attr('content-disposition'));
  }
};

sub ask_peekaboo_internal {
  my ($dummy, $dummy, $names_to_parts, $dummy) = @_;

  # extract some additional info for peekaboo from the parts objects
  # structure:
  # [ { "full_name" => "<path>", "name_declared" => ..., ... },
  #   { ... },
  #   ... ]
  my $metainfo = [];
  foreach my $partname (keys %{$names_to_parts}) {
    my $pmi = {};

    my $part = $names_to_parts->{$partname};
    for my $field (qw( full_name name_declared type_declared content_disposition )) {
      my $val = $part->$field();

      # name_declared can be an array of names, if so use the last
      $val = ref $val eq 'ARRAY' ? $val->[-1] : $val;

      # when running under perl 5.22.1 e.g. on Ubuntu 16.04 safe_decode_mime
      # somehow corrupts the original value of $val so we may get errors in
      # totally unrelated code later on such as:
      #   FAILED: Malformed UTF-8 character (fatal) at /usr/sbin/amavisd-new
      #   line 10210.
      # We work around that by forcing a copy.
      $val = Amavis::Util::safe_decode_mime("" . $val) if $field eq "name_declared";

      # do not transfer undef values since they're no good for nothing
      $pmi->{$field} = $val if defined($val);
    }

    push(@$metainfo, $pmi);
  }

  # format the whole thing as JSON, enforcing UTF-8 encoding and override
  # AMaViS-expanded query for standard ask_daemon_internal to execute
  $_[0] = JSON::PP->new->utf8->encode($metainfo) . "\n";
  Amavis::AV::ask_daemon_internal(@_);
}

# force run_av to execute our protocol routine so we can generate the final
# JSON request structure there. We cannot do it here because run_av will expand
# special placeholders {} and * in the query template and corrupt our JSON if
# it contains those, either because of empty hashes formatted as {} or just a
# simple file name containing the sequence.
sub ask_peekaboo {
  my (@run_av_args) = @_;

  # be sure to patch a copy and not @_ because this would change the referenced
  # call parameter and make the next call to us go directly to _internal
  # instead, resulting in error messages like: Peekaboo-Analysis av-scanner
  # FAILED: Can't use string ("/var/lib/amavis/tmp/amavis-20180"...) as a HASH
  # ref while "strict refs" in use at /etc/amavis/conf.d/10-ask_peekaboo line
  # 61.
  $run_av_args[4] = \&ask_peekaboo_internal;
  Amavis::AV::run_av(@run_av_args);
}

1;  # ensure a defined return value
