diff --git a/lib/HTTPMicro.pm b/lib/HTTPMicro.pm new file mode 100644 index 00000000..6673ae4f --- /dev/null +++ b/lib/HTTPMicro.pm @@ -0,0 +1,458 @@ +package HTTP::Micro; +BEGIN { + $HTTP::Micro::VERSION = '0.001'; +} +use strict; +use warnings; +# A stripped down version of HTTP::Tiny; but not a correct HTTP/1.1 +# implementation + +use Carp (); + + +my @attributes; +BEGIN { + @attributes = qw(agent default_headers timeout); + no strict 'refs'; + for my $accessor ( @attributes ) { + *{$accessor} = sub { + @_ > 1 ? $_[0]->{$accessor} = $_[1] : $_[0]->{$accessor}; + }; + } +} + +sub new { + my($class, %args) = @_; + (my $agent = $class) =~ s{::}{-}g; + my $self = { + agent => $agent . "/" . ($class->VERSION || 0), + timeout => 60, + }; + for my $key ( @attributes ) { + $self->{$key} = $args{$key} if exists $args{$key} + } + return bless $self, $class; +} + +sub request { + my ($self, $method, $url, $args) = @_; + @_ == 3 || (@_ == 4 && ref $args eq 'HASH') + or Carp::croak(q/Usage: $http->request(METHOD, URL, [HASHREF])/); + $args ||= {}; # we keep some state in this during _request + + # RFC 2616 Section 8.1.4 mandates a single retry on broken socket + my $response; + for ( 0 .. 1 ) { + $response = eval { $self->_request($method, $url, $args) }; + last unless $@ && $method eq 'GET' + && $@ =~ m{^(?:Socket closed|Unexpected end)}; + } + + if (my $e = "$@") { + $response = { + success => q{}, + status => 599, + reason => 'Internal Exception', + content => $e, + headers => { + 'content-type' => 'text/plain', + 'content-length' => length $e, + } + }; + } + return $response; +} + +my %DefaultPort = ( + http => 80, + https => 443, +); + +sub _request { + my ($self, $method, $url, $args) = @_; + + my ($scheme, $host, $port, $path_query) = $self->_split_url($url); + + my $request = { + method => $method, + scheme => $scheme, + host_port => ($port == $DefaultPort{$scheme} ? $host : "$host:$port"), + uri => $path_query, + headers => {}, + }; + + my $handle = HTTP::Micro::Handle->new(timeout => $self->{timeout}); + + $handle->connect($scheme, $host, $port); + + $self->_prepare_headers_and_cb($request, $args); + $handle->write_request_header(@{$request}{qw/method uri headers/}); + $handle->write_content_body($request) if $request->{content}; + + my $response; + do { $response = $handle->read_response_header } + until (substr($response->{status},0,1) ne '1'); + + if (!($method eq 'HEAD' || $response->{status} =~ /^[23]04/)) { + $response->{content} = ''; + $handle->read_content_body(sub { $_[1]->{content} .= $_[0] }, $response); + } + + $handle->close; + $response->{success} = substr($response->{status},0,1) eq '2'; + return $response; +} + +sub _prepare_headers_and_cb { + my ($self, $request, $args) = @_; + + for ($self->{default_headers}, $args->{headers}) { + next unless defined; + while (my ($k, $v) = each %$_) { + $request->{headers}{lc $k} = $v; + } + } + $request->{headers}{'host'} = $request->{host_port}; + $request->{headers}{'connection'} = "close"; + $request->{headers}{'user-agent'} ||= $self->{agent}; + + if (defined $args->{content}) { + $request->{headers}{'content-type'} ||= "application/octet-stream"; + utf8::downgrade($args->{content}, 1) + or Carp::croak(q/Wide character in request message body/); + $request->{headers}{'content-length'} = length $args->{content}; + } + return; +} + +sub _split_url { + my $url = pop; + + # URI regex adapted from the URI module + my ($scheme, $authority, $path_query) = $url =~ m<\A([^:/?#]+)://([^/?#]*)([^#]*)> + or Carp::croak(qq/Cannot parse URL: '$url'/); + + $scheme = lc $scheme; + $path_query = "/$path_query" unless $path_query =~ m<\A/>; + + my $host = (length($authority)) ? lc $authority : 'localhost'; + $host =~ s/\A[^@]*@//; # userinfo + my $port = do { + $host =~ s/:([0-9]*)\z// && length $1 + ? $1 + : ($scheme eq 'http' ? 80 : $scheme eq 'https' ? 443 : undef); + }; + + return ($scheme, $host, $port, $path_query); +} + +package + HTTP::Micro::Handle; # hide from PAUSE/indexers +use strict; +use warnings; + +use Carp qw[croak]; +use Errno qw[EINTR EPIPE]; +use IO::Socket qw[SOCK_STREAM]; + +sub BUFSIZE () { 32768 } + +my $Printable = sub { + local $_ = shift; + s/\r/\\r/g; + s/\n/\\n/g; + s/\t/\\t/g; + s/([^\x20-\x7E])/sprintf('\\x%.2X', ord($1))/ge; + $_; +}; + +sub new { + my ($class, %args) = @_; + return bless { + rbuf => '', + timeout => 60, + max_line_size => 16384, + %args + }, $class; +} + +my $ssl_verify_args = { + check_cn => "when_only", + wildcards_in_alt => "anywhere", + wildcards_in_cn => "anywhere" +}; + +sub connect { + @_ == 4 || croak(q/Usage: $handle->connect(scheme, host, port)/); + my ($self, $scheme, $host, $port) = @_; + + if ( $scheme eq 'https' ) { + eval "require IO::Socket::SSL" + unless exists $INC{'IO/Socket/SSL.pm'}; + croak(qq/IO::Socket::SSL must be installed for https support/) + unless $INC{'IO/Socket/SSL.pm'}; + } + elsif ( $scheme ne 'http' ) { + croak(qq/Unsupported URL scheme '$scheme'/); + } + + $self->{fh} = 'IO::Socket::INET'->new( + PeerHost => $host, + PeerPort => $port, + Proto => 'tcp', + Type => SOCK_STREAM, + Timeout => $self->{timeout} + ) or croak(qq/Could not connect to '$host:$port': $@/); + + binmode($self->{fh}) + or croak(qq/Could not binmode() socket: '$!'/); + + if ( $scheme eq 'https') { + IO::Socket::SSL->start_SSL($self->{fh}); + ref($self->{fh}) eq 'IO::Socket::SSL' + or die(qq/SSL connection failed for $host\n/); + $self->{fh}->verify_hostname( $host, $ssl_verify_args ) + or die(qq/SSL certificate not valid for $host\n/); + } + + $self->{host} = $host; + $self->{port} = $port; + + return $self; +} + +sub close { + @_ == 1 || croak(q/Usage: $handle->close()/); + my ($self) = @_; + CORE::close($self->{fh}) + or croak(qq/Could not close socket: '$!'/); +} + +sub write { + @_ == 2 || croak(q/Usage: $handle->write(buf)/); + my ($self, $buf) = @_; + + my $len = length $buf; + my $off = 0; + + local $SIG{PIPE} = 'IGNORE'; + + while () { + $self->can_write + or croak(q/Timed out while waiting for socket to become ready for writing/); + my $r = syswrite($self->{fh}, $buf, $len, $off); + if (defined $r) { + $len -= $r; + $off += $r; + last unless $len > 0; + } + elsif ($! == EPIPE) { + croak(qq/Socket closed by remote server: $!/); + } + elsif ($! != EINTR) { + croak(qq/Could not write to socket: '$!'/); + } + } + return $off; +} + +sub read { + @_ == 2 || @_ == 3 || croak(q/Usage: $handle->read(len)/); + my ($self, $len) = @_; + + my $buf = ''; + my $got = length $self->{rbuf}; + + if ($got) { + my $take = ($got < $len) ? $got : $len; + $buf = substr($self->{rbuf}, 0, $take, ''); + $len -= $take; + } + + while ($len > 0) { + $self->can_read + or croak(q/Timed out while waiting for socket to become ready for reading/); + my $r = sysread($self->{fh}, $buf, $len, length $buf); + if (defined $r) { + last unless $r; + $len -= $r; + } + elsif ($! != EINTR) { + croak(qq/Could not read from socket: '$!'/); + } + } + if ($len) { + croak(q/Unexpected end of stream/); + } + return $buf; +} + +sub readline { + @_ == 1 || croak(q/Usage: $handle->readline()/); + my ($self) = @_; + + while () { + if ($self->{rbuf} =~ s/\A ([^\x0D\x0A]* \x0D?\x0A)//x) { + return $1; + } + $self->can_read + or croak(q/Timed out while waiting for socket to become ready for reading/); + my $r = sysread($self->{fh}, $self->{rbuf}, BUFSIZE, length $self->{rbuf}); + if (defined $r) { + last unless $r; + } + elsif ($! != EINTR) { + croak(qq/Could not read from socket: '$!'/); + } + } + croak(q/Unexpected end of stream while looking for line/); +} + +sub read_header_lines { + @_ == 1 || @_ == 2 || croak(q/Usage: $handle->read_header_lines([headers])/); + my ($self, $headers) = @_; + $headers ||= {}; + my $lines = 0; + my $val; + + while () { + my $line = $self->readline; + + if ($line =~ /\A ([^\x00-\x1F\x7F:]+) : [\x09\x20]* ([^\x0D\x0A]*)/x) { + my ($field_name) = lc $1; + $val = \($headers->{$field_name} = $2); + } + elsif ($line =~ /\A [\x09\x20]+ ([^\x0D\x0A]*)/x) { + $val + or croak(q/Unexpected header continuation line/); + next unless length $1; + $$val .= ' ' if length $$val; + $$val .= $1; + } + elsif ($line =~ /\A \x0D?\x0A \z/x) { + last; + } + else { + croak(q/Malformed header line: / . $Printable->($line)); + } + } + return $headers; +} + +sub write_header_lines { + (@_ == 2 && ref $_[1] eq 'HASH') || croak(q/Usage: $handle->write_header_lines(headers)/); + my($self, $headers) = @_; + + my $buf = ''; + while (my ($k, $v) = each %$headers) { + my $field_name = lc $k; + $field_name =~ /\A [\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]+ \z/x + or croak(q/Invalid HTTP header field name: / . $Printable->($field_name)); + $field_name =~ s/\b(\w)/\u$1/g; + $buf .= "$field_name: $v\x0D\x0A"; + } + $buf .= "\x0D\x0A"; + return $self->write($buf); +} + +sub read_content_body { + @_ == 3 || @_ == 4 || croak(q/Usage: $handle->read_content_body(callback, response, [read_length])/); + my ($self, $cb, $response, $len) = @_; + $len ||= $response->{headers}{'content-length'}; + + croak("No content-length in the returned response, and this " + . "UA doesn't implement chunking") unless defined $len; + + while ($len > 0) { + my $read = ($len > BUFSIZE) ? BUFSIZE : $len; + $cb->($self->read($read), $response); + $len -= $read; + } + + return; +} + +sub write_content_body { + @_ == 2 || croak(q/Usage: $handle->write_content_body(request)/); + my ($self, $request) = @_; + my ($len, $content_length) = (0, $request->{headers}{'content-length'}); + + $len += $self->write($request->{content}); + + $len == $content_length + or croak(qq/Content-Length missmatch (got: $len expected: $content_length)/); + + return $len; +} + +sub read_response_header { + @_ == 1 || croak(q/Usage: $handle->read_response_header()/); + my ($self) = @_; + + my $line = $self->readline; + + $line =~ /\A (HTTP\/(0*\d+\.0*\d+)) [\x09\x20]+ ([0-9]{3}) [\x09\x20]+ ([^\x0D\x0A]*) \x0D?\x0A/x + or croak(q/Malformed Status-Line: / . $Printable->($line)); + + my ($protocol, $version, $status, $reason) = ($1, $2, $3, $4); + + return { + status => $status, + reason => $reason, + headers => $self->read_header_lines, + protocol => $protocol, + }; +} + +sub write_request_header { + @_ == 4 || croak(q/Usage: $handle->write_request_header(method, request_uri, headers)/); + my ($self, $method, $request_uri, $headers) = @_; + + return $self->write("$method $request_uri HTTP/1.1\x0D\x0A") + + $self->write_header_lines($headers); +} + +sub _do_timeout { + my ($self, $type, $timeout) = @_; + $timeout = $self->{timeout} + unless defined $timeout && $timeout >= 0; + + my $fd = fileno $self->{fh}; + defined $fd && $fd >= 0 + or croak(q/select(2): 'Bad file descriptor'/); + + my $initial = time; + my $pending = $timeout; + my $nfound; + + vec(my $fdset = '', $fd, 1) = 1; + + while () { + $nfound = ($type eq 'read') + ? select($fdset, undef, undef, $pending) + : select(undef, $fdset, undef, $pending) ; + if ($nfound == -1) { + $! == EINTR + or croak(qq/select(2): '$!'/); + redo if !$timeout || ($pending = $timeout - (time - $initial)) > 0; + $nfound = 0; + } + last; + } + $! = 0; + return $nfound; +} + +sub can_read { + @_ == 1 || @_ == 2 || croak(q/Usage: $handle->can_read([timeout])/); + my $self = shift; + return $self->_do_timeout('read', @_) +} + +sub can_write { + @_ == 1 || @_ == 2 || croak(q/Usage: $handle->can_write([timeout])/); + my $self = shift; + return $self->_do_timeout('write', @_) +} + +1; diff --git a/t/lib/HTTPMicro.t b/t/lib/HTTPMicro.t new file mode 100644 index 00000000..ec0b7184 --- /dev/null +++ b/t/lib/HTTPMicro.t @@ -0,0 +1,27 @@ +#!/usr/bin/perl + +BEGIN { + die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" + unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; + unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; +}; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use Test::More; + +use HTTP::Tiny; +use HTTPMicro; + +my $test_url = "http://www.google.com"; +my $tiny = HTTP::Tiny->new(max_redirect => 0)->request('GET', $test_url); +my $micro = HTTP::Micro->new->request('GET', $test_url); + +is_deeply( + $micro->{content}, + $tiny->{content}, + "HTTP::Micro behaves like HTTP::Tiny (with max_redirect) for $test_url" +); + +done_testing;