#!/usr/local/bin/perl -w
# $Id: ians,v 1.37.1.4 1999/04/19 20:12:42 brianr Exp $
$ourexefile=$0;
$version='$Revision: 1.37.1.4 $ ';
$version =~ m/\ (.*) /;
$version = "$1";
$fullversion = "IANS $1";
$urlctr = 0 ;
print "$$ [". time2str() ."] $version \n";
STDOUT->flush();
# Internet Alternate Namespace
# Copyright (C) 1998 Brian Ristuccia
# With contributions by Bennett Haselton
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# Where's the redirection form website?
$website="http://www.osiris.978.org/~brianr/ians/";
{
package RefFixer;
require HTML::Filter;
@ISA=qw(HTML::Filter);
sub output {
my $self = shift;
my $text = $_[0];
if ($self->{prohibtags}) {
# if ( $text =~ m/\<\! IANS-VERSION \>/ ) {
# push(@{$self->{fhtml}}, "*" . $main::fullversion);
# return;
# } #elsif ( $_[0] eq '' )
#push(@{$self->{fhtml}}, "");
1;
} else {
# Skip javascript and other cruft.
push(@{$self->{fhtml}}, $text);
}
}
sub filtered_html { join("", @{$_[0]->{fhtml}}) }
sub start {
my $self = shift;
my %props = %{$_[1]};
my @order = @{$_[2]};
my $fixurl;
my $altbase;
$self->{prohibtags}++ if $_[0] eq "script";
$_[0] = "X-$_[0]" if $_[0] eq "meta";
$compltext="<$_[0]";
foreach $prop (@order) {
if ( $prop =~ m/^on.+/ ) {
# No javascrit on* tags. Comment em out.
$props{$prop} = "// " . $props{$prop}
}
if ( $prop =~ m/(href|src|background|action)/ ) {
$fixurl=undef;
if ( ($props{$prop} =~ m/^[^:\/]*:/) ) {
# Reference like http://www.site.com/file/
$fixurl="$props{$prop}";
} elsif ($props{$prop} =~ m/^\// ) {
# root reference
$altbase = $main::base;
$altbase =~ m/([^:\/]*:\/\/[^\/]*)\//;
$fixurl="$1$props{$prop}";
} else {
#wicked relative
my $url1 = new URI::URL "$props{$prop}", "$main::base";
$fixurl=$url1->abs;
}
$fixurl = main::EncodeURL($fixurl);
if ($fixurl) { $props{$prop}="/$fixurl"; };
}
$compltext="$compltext $prop=\"$props{$prop}\"";
}
$compltext="$compltext\>";
$self->SUPER::start($_[0],$_[1],$_[2],$compltext);
}
sub end
{
my $self = shift;
$self->SUPER::end(@_);
$self->{prohibtags}-- if $_[0] eq "script";
}
}
use HTTP::Daemon;
use HTTP::Status;
use HTTP::Date;
use LWP::UserAgent;
use URI::URL;
use Net::hostent;
use POSIX ":sys_wait_h";
use MIME::Base64;
use IO::Socket;
use HTML::Entities ();
# Set up our user agent to snag stuff for us. We're doing this before we fork
# because the kernel can copy-on-write parts of the UA as neccessary much
# faster than perl can set up entire UA's.
$ua = new LWP::UserAgent;
# If our local squid is up, use it.
$ua->env_proxy();
# Set the UA string
$ua->agent("$fullversion " . $ua->agent);
# Discourage abuse
@abuse_strings = (
# PHF Exploit
'phf?(.*?)(passwd|group|htpasswd|xterm|rxvt)',
# Chargen provides a continuous stream of characters, and can be used
# to flood out network links or deplete resources on the proxy.
'^http://(.*?):(chargen|19)'
);
sub IsAbuseURL {
my($url) = @_;
foreach $string (@abuse_strings) {
if ($url =~ /$string/i) {
return 1;
}
}
return 0;
}
sub DecodeFragment {
print $urldsock "dec @_\n";
$baz=<$urldsock>;
chomp $baz;
return $baz;
}
sub EncodeFragment {
print $urldsock "enc @_\n";
$baz=<$urldsock>;
chomp $baz;
return $baz;
}
sub EncodeURL {
return EncodeFragment(@_);
#$originalurl;
}
sub DecodeURL {
my($originalurl) = @_;
# $encodedurl =~ s/(..)/chr(hex($1))/eg;
return DecodeFragment($originalurl);
#decode_base64($originalurl);
#return $originalurl;
}
# set up as a web server listening on whatever port
$d = new HTTP::Daemon(LocalPort => 8802,
Listen => 32,
Reuse => 1)
or die("Couldn't establish listening socket for HTTP server");
# Sometimes we have socket accepts that fail, but this doesn't mean it's a
# permenant failure condition. This keeps the program looping forever.
while (1) {
# Get a connection
while ($c = $d->accept) {
# Clean up any left over zombies
while (($deadpid= waitpid(-1,&WNOHANG)) > 0) { 1; }
# Now we fork off a separate copy of ourselves to read and process the
# Reques. This makes us immediately available to process other connections.
while ( !defined ($forkvalue = fork())) {
print "$$ [" . time2str . "] Couldn't fork -- collecting zombies, pausing, and then trying again.\n";
STDOUT->flush();
while (($deadpid= waitpid(-1,&WNOHANG)) > 0) { 1; }
sleep 1;
}
if ( $forkvalue eq "0" ) {
# Get a connection to the URL daemon.
while ( !defined ($urldsock = IO::Socket::INET->new('127.0.0.1:9000')) ) {
# Couldn't connect? Maybe it got hosed. Start another.
system('urld') or die "Couldn't start URL Daemon: $!";
}
# Try to resolve the remote machine's hostname
if ($remotehostname = gethost($c->peerhost)) {
$remotehostname=$remotehostname->name;
} else {
$remotehostname=$c->peerhost;
}
# sometimes we don't get a request right away, so set up the status
# indicator.
$0="$ourexefile $remotehostname (0)\000\000";
# Set a timeout on this socket so we can't get busied out forever. We
# don't set the timeout right away because resolving the host on its
# initial connection may take some time.
$c->timeout(32);
# Now we get the request. Note that since I implemented Keep-Alive, we
# can get multiple requests on the same connection.
while ($r = $c->get_request) {
# Set some information about our current status
$numberofconnections++;
$0="$ourexefile $remotehostname ($numberofconnections)\000\000";
# Display some information for the log file
print "$$ [" . time2str . "] Request: " . $r->url . " from " . $remotehostname . ":"
. $c->peerport . " : ";
# Look at the Host: header to determine the hostname we were
# accessed by, and use it in all redirects. We have no idea if
# we're being accessed through a port
# redirector, DNS alias, etc. If we have't been told, make a
# reasonable guess.
if (! ($myhost = $r->headers->header(Host))) {
$myhost= $c->sockhost() . ":" . $c->sockport();
}
$MYURL="http://" . $myhost. "/";
# Get the path that was requested from the server and remove any
# leading slash.
my $temppath = $r->url->full_path . "";
if ($temppath =~ s!^/via-another-proxy-from/([^/]+)/!!) {
# Perhaps this could be abused to make an attack appear to come
# from another host, but they could do that via proxy servers or
# the Via: header anyway, so it's no real loss.
$reallyfrom="$1, ";
#print "REALLY FROM $1 -- adjusted to $temppath -- \n";
}
$temppath =~ s/^\///;
#print "\n* ---". $temppath . "---\n";
if ($temppath eq "") {
# No URL? Show them a web site where they can enter one.
$r->url($website);
} else {
# Separate any query information from GET requests and separate it
# from the path we're about to decode.
if ( $temppath =~ m/(.*)(\?.*)/ ) {
$temppath = $1;
$querypart =$2;
} else {
$querypart = "";
}
# Decode the URL they gave us.
$temppath =DecodeURL($temppath);
#~ s/(..)/chr(hex($1))/eg;
# Now set the request URL to the decoded path plus any query
# information
$r->url ( $temppath . $querypart );
}
# And print the decoded and nicified URL into the log.
print $r->url . "\n";
# if ( $r->url eq "" ) {
# $r->url($website);
#} els
if ( &IsAbuseURL( $r->url ) ) {
# Someone thinks they're cool trying old-school phf lameness or other crap
$r->url ( $website . "abuse.html" );
} elsif ( ! ( $r->url =~ m/^(http|ftp):\/\// ) ) {
# Unsupported protocol (or perhaps something nasty like file://)
$r->url ( $website . "otherprot.cgi?url=" . $r->url );
}
# If our document has a referer header, then we need to decode it,
# because it's currently hexified.
if ( $referer = $r->headers->header(Referer) ) {
# strip us off the referer for sites that check.
if ( $referer =~ s/^$MYURL// ) {
$referer = DecodeURL($referer);
$r->headers->header(Referer => "$referer");
# print "\n*Referer is $referer\n";
#$referer =~ s/(..)/chr(hex($1))/eg;
}
}
# print "$$ Referer is fixed as " . $r->headers->header(Referer) . "\n";
$r->headers->remove_header(Content_Length);
$r->headers->remove_header(Host);
$browserhaskeepalive = $r->headers->header(Connection => 'Close');
# Inlcude valuable diagnostic information and discourage use as an anonymous
# means to defraud or vandalize remote sites.
#$via=$r->headers->header(Via);
if ( defined ($via = $r->headers->header(Via)) ) {
$via=$via.", ";
} else {
$via = "";
}
$r->headers->header(Via => $via . $r->protocol() . " $myhost ($fullversion request received from $remotehostname)");
;
if ( defined ($via=$r->headers->header(X_Forwarded_For)) ) {
$via=$via.", ";
} else {
$via="";
}
$r->headers->header(X_Forwarded_For => $via . $reallyfrom . $c->peerhost );
# show the request as a debug message
#print $r->as_string;
my $res = $ua->simple_request($r);
# print "$$ Got response\n";
if ($res->headers->header(Location)) {
# lets check before tacking our URL onto the header... What if we're
# already on there (kaboom!)
$location=$res->headers->header(Location);
if ( !($location =~ m/^$MYURL/) ) {
# encode and update the location if it's not already.
# print "$$ Encoding location $location\n";
$location = EncodeURL($location);
#~ s/(.)/sprintf("%02x", ord($1))/ge;
$res->headers->header(Location => "$MYURL" . "$location" );
}
}
# print "$$ Sent response\n";
if ( !($res->code =~ m/(400|500|501)/ ) ) {
if ( ( $res->headers->header(Content_Type) || '' ) eq "text/html" ) {
# we need to do some fixup here.
$base=$res->base;
# print "$$ content base is " . $base . "\n";
$res->add_content("
-");
$res->add_content("
");
#
Via Internet Alternate Namespace ' . $version . ' Copyright © 1998 '.
# 'Brian Ristuccia' .
# '
[visit another site]');
$p = RefFixer->new->parse($res->content);
$res->content($p->filtered_html);
# could trigger keyword blocking. Sorry.
#$res->add_content( '[view directly]'. "\n");
}
# Set the Content-Length so Keep-Alive doesn't break.
$res->headers->header(Content_Length => length($res->content()));
# If the browser had Keep-Alive, enable it.
$res->headers->header(Connection => $browserhaskeepalive);
# This header was returned by the server, and may have no basis
# in reality.
$res->headers->remove_header('Keep-Alive');
$c->send_response($res);
} else {
#if ($res->code == 400) {
# # Send user to 'Where do you want to go?' page.
# $setmeup = $website;
# $setmeup =~ s/(.)/sprintf("%02x", ord($1))/ge;
# $c->send_redirect("$MYURL" . $setmeup);
#}# else {
$c->send_error ($res->code, $res->error_as_HTML);
#}
}
$c->flush;
}
$c = undef;
exit;
} else {
# This is what we do if we're the parent of the fork()
$c = undef; # close connection
}
}
}