package HTTP::Micro; BEGIN { $HTTP::Micro::VERSION = '0.001'; } use strict; use warnings; # A stripped down version of HTTP::Tiny; but not a correct HTTP/1.1 # implementation use Carp (); my @attributes; BEGIN { @attributes = qw(agent default_headers timeout); no strict 'refs'; for my $accessor ( @attributes ) { *{$accessor} = sub { @_ > 1 ? $_[0]->{$accessor} = $_[1] : $_[0]->{$accessor}; }; } } sub new { my($class, %args) = @_; (my $agent = $class) =~ s{::}{-}g; my $self = { agent => $agent . "/" . ($class->VERSION || 0), timeout => 60, }; for my $key ( @attributes ) { $self->{$key} = $args{$key} if exists $args{$key} } return bless $self, $class; } sub request { my ($self, $method, $url, $args) = @_; @_ == 3 || (@_ == 4 && ref $args eq 'HASH') or Carp::croak(q/Usage: $http->request(METHOD, URL, [HASHREF])/); $args ||= {}; # we keep some state in this during _request # RFC 2616 Section 8.1.4 mandates a single retry on broken socket my $response; for ( 0 .. 1 ) { $response = eval { $self->_request($method, $url, $args) }; last unless $@ && $method eq 'GET' && $@ =~ m{^(?:Socket closed|Unexpected end)}; } if (my $e = "$@") { $response = { success => q{}, status => 599, reason => 'Internal Exception', content => $e, headers => { 'content-type' => 'text/plain', 'content-length' => length $e, } }; } return $response; } my %DefaultPort = ( http => 80, https => 443, ); sub _request { my ($self, $method, $url, $args) = @_; my ($scheme, $host, $port, $path_query) = $self->_split_url($url); my $request = { method => $method, scheme => $scheme, host_port => ($port == $DefaultPort{$scheme} ? $host : "$host:$port"), uri => $path_query, headers => {}, }; my $handle = HTTP::Micro::Handle->new(timeout => $self->{timeout}); $handle->connect($scheme, $host, $port); $self->_prepare_headers_and_cb($request, $args); $handle->write_request_header(@{$request}{qw/method uri headers/}); $handle->write_content_body($request) if $request->{content}; my $response; do { $response = $handle->read_response_header } until (substr($response->{status},0,1) ne '1'); if (!($method eq 'HEAD' || $response->{status} =~ /^[23]04/)) { $response->{content} = ''; $handle->read_content_body(sub { $_[1]->{content} .= $_[0] }, $response); } $handle->close; $response->{success} = substr($response->{status},0,1) eq '2'; return $response; } sub _prepare_headers_and_cb { my ($self, $request, $args) = @_; for ($self->{default_headers}, $args->{headers}) { next unless defined; while (my ($k, $v) = each %$_) { $request->{headers}{lc $k} = $v; } } $request->{headers}{'host'} = $request->{host_port}; $request->{headers}{'connection'} = "close"; $request->{headers}{'user-agent'} ||= $self->{agent}; if (defined $args->{content}) { $request->{headers}{'content-type'} ||= "application/octet-stream"; utf8::downgrade($args->{content}, 1) or Carp::croak(q/Wide character in request message body/); $request->{headers}{'content-length'} = length $args->{content}; } return; } sub _split_url { my $url = pop; # URI regex adapted from the URI module my ($scheme, $authority, $path_query) = $url =~ m<\A([^:/?#]+)://([^/?#]*)([^#]*)> or Carp::croak(qq/Cannot parse URL: '$url'/); $scheme = lc $scheme; $path_query = "/$path_query" unless $path_query =~ m<\A/>; my $host = (length($authority)) ? lc $authority : 'localhost'; $host =~ s/\A[^@]*@//; # userinfo my $port = do { $host =~ s/:([0-9]*)\z// && length $1 ? $1 : ($scheme eq 'http' ? 80 : $scheme eq 'https' ? 443 : undef); }; return ($scheme, $host, $port, $path_query); } package HTTP::Micro::Handle; # hide from PAUSE/indexers use strict; use warnings; use Carp qw[croak]; use Errno qw[EINTR EPIPE]; use IO::Socket qw[SOCK_STREAM]; sub BUFSIZE () { 32768 } my $Printable = sub { local $_ = shift; s/\r/\\r/g; s/\n/\\n/g; s/\t/\\t/g; s/([^\x20-\x7E])/sprintf('\\x%.2X', ord($1))/ge; $_; }; sub new { my ($class, %args) = @_; return bless { rbuf => '', timeout => 60, max_line_size => 16384, %args }, $class; } my $ssl_verify_args = { check_cn => "when_only", wildcards_in_alt => "anywhere", wildcards_in_cn => "anywhere" }; sub connect { @_ == 4 || croak(q/Usage: $handle->connect(scheme, host, port)/); my ($self, $scheme, $host, $port) = @_; if ( $scheme eq 'https' ) { eval "require IO::Socket::SSL" unless exists $INC{'IO/Socket/SSL.pm'}; croak(qq/IO::Socket::SSL must be installed for https support/) unless $INC{'IO/Socket/SSL.pm'}; } elsif ( $scheme ne 'http' ) { croak(qq/Unsupported URL scheme '$scheme'/); } $self->{fh} = 'IO::Socket::INET'->new( PeerHost => $host, PeerPort => $port, Proto => 'tcp', Type => SOCK_STREAM, Timeout => $self->{timeout} ) or croak(qq/Could not connect to '$host:$port': $@/); binmode($self->{fh}) or croak(qq/Could not binmode() socket: '$!'/); if ( $scheme eq 'https') { IO::Socket::SSL->start_SSL($self->{fh}); ref($self->{fh}) eq 'IO::Socket::SSL' or die(qq/SSL connection failed for $host\n/); $self->{fh}->verify_hostname( $host, $ssl_verify_args ) or die(qq/SSL certificate not valid for $host\n/); } $self->{host} = $host; $self->{port} = $port; return $self; } sub close { @_ == 1 || croak(q/Usage: $handle->close()/); my ($self) = @_; CORE::close($self->{fh}) or croak(qq/Could not close socket: '$!'/); } sub write { @_ == 2 || croak(q/Usage: $handle->write(buf)/); my ($self, $buf) = @_; my $len = length $buf; my $off = 0; local $SIG{PIPE} = 'IGNORE'; while () { $self->can_write or croak(q/Timed out while waiting for socket to become ready for writing/); my $r = syswrite($self->{fh}, $buf, $len, $off); if (defined $r) { $len -= $r; $off += $r; last unless $len > 0; } elsif ($! == EPIPE) { croak(qq/Socket closed by remote server: $!/); } elsif ($! != EINTR) { croak(qq/Could not write to socket: '$!'/); } } return $off; } sub read { @_ == 2 || @_ == 3 || croak(q/Usage: $handle->read(len)/); my ($self, $len) = @_; my $buf = ''; my $got = length $self->{rbuf}; if ($got) { my $take = ($got < $len) ? $got : $len; $buf = substr($self->{rbuf}, 0, $take, ''); $len -= $take; } while ($len > 0) { $self->can_read or croak(q/Timed out while waiting for socket to become ready for reading/); my $r = sysread($self->{fh}, $buf, $len, length $buf); if (defined $r) { last unless $r; $len -= $r; } elsif ($! != EINTR) { croak(qq/Could not read from socket: '$!'/); } } if ($len) { croak(q/Unexpected end of stream/); } return $buf; } sub readline { @_ == 1 || croak(q/Usage: $handle->readline()/); my ($self) = @_; while () { if ($self->{rbuf} =~ s/\A ([^\x0D\x0A]* \x0D?\x0A)//x) { return $1; } $self->can_read or croak(q/Timed out while waiting for socket to become ready for reading/); my $r = sysread($self->{fh}, $self->{rbuf}, BUFSIZE, length $self->{rbuf}); if (defined $r) { last unless $r; } elsif ($! != EINTR) { croak(qq/Could not read from socket: '$!'/); } } croak(q/Unexpected end of stream while looking for line/); } sub read_header_lines { @_ == 1 || @_ == 2 || croak(q/Usage: $handle->read_header_lines([headers])/); my ($self, $headers) = @_; $headers ||= {}; my $lines = 0; my $val; while () { my $line = $self->readline; if ($line =~ /\A ([^\x00-\x1F\x7F:]+) : [\x09\x20]* ([^\x0D\x0A]*)/x) { my ($field_name) = lc $1; $val = \($headers->{$field_name} = $2); } elsif ($line =~ /\A [\x09\x20]+ ([^\x0D\x0A]*)/x) { $val or croak(q/Unexpected header continuation line/); next unless length $1; $$val .= ' ' if length $$val; $$val .= $1; } elsif ($line =~ /\A \x0D?\x0A \z/x) { last; } else { croak(q/Malformed header line: / . $Printable->($line)); } } return $headers; } sub write_header_lines { (@_ == 2 && ref $_[1] eq 'HASH') || croak(q/Usage: $handle->write_header_lines(headers)/); my($self, $headers) = @_; my $buf = ''; while (my ($k, $v) = each %$headers) { my $field_name = lc $k; $field_name =~ /\A [\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]+ \z/x or croak(q/Invalid HTTP header field name: / . $Printable->($field_name)); $field_name =~ s/\b(\w)/\u$1/g; $buf .= "$field_name: $v\x0D\x0A"; } $buf .= "\x0D\x0A"; return $self->write($buf); } sub read_content_body { @_ == 3 || @_ == 4 || croak(q/Usage: $handle->read_content_body(callback, response, [read_length])/); my ($self, $cb, $response, $len) = @_; $len ||= $response->{headers}{'content-length'}; croak("No content-length in the returned response, and this " . "UA doesn't implement chunking") unless defined $len; while ($len > 0) { my $read = ($len > BUFSIZE) ? BUFSIZE : $len; $cb->($self->read($read), $response); $len -= $read; } return; } sub write_content_body { @_ == 2 || croak(q/Usage: $handle->write_content_body(request)/); my ($self, $request) = @_; my ($len, $content_length) = (0, $request->{headers}{'content-length'}); $len += $self->write($request->{content}); $len == $content_length or croak(qq/Content-Length missmatch (got: $len expected: $content_length)/); return $len; } sub read_response_header { @_ == 1 || croak(q/Usage: $handle->read_response_header()/); my ($self) = @_; my $line = $self->readline; $line =~ /\A (HTTP\/(0*\d+\.0*\d+)) [\x09\x20]+ ([0-9]{3}) [\x09\x20]+ ([^\x0D\x0A]*) \x0D?\x0A/x or croak(q/Malformed Status-Line: / . $Printable->($line)); my ($protocol, $version, $status, $reason) = ($1, $2, $3, $4); return { status => $status, reason => $reason, headers => $self->read_header_lines, protocol => $protocol, }; } sub write_request_header { @_ == 4 || croak(q/Usage: $handle->write_request_header(method, request_uri, headers)/); my ($self, $method, $request_uri, $headers) = @_; return $self->write("$method $request_uri HTTP/1.1\x0D\x0A") + $self->write_header_lines($headers); } sub _do_timeout { my ($self, $type, $timeout) = @_; $timeout = $self->{timeout} unless defined $timeout && $timeout >= 0; my $fd = fileno $self->{fh}; defined $fd && $fd >= 0 or croak(q/select(2): 'Bad file descriptor'/); my $initial = time; my $pending = $timeout; my $nfound; vec(my $fdset = '', $fd, 1) = 1; while () { $nfound = ($type eq 'read') ? select($fdset, undef, undef, $pending) : select(undef, $fdset, undef, $pending) ; if ($nfound == -1) { $! == EINTR or croak(qq/select(2): '$!'/); redo if !$timeout || ($pending = $timeout - (time - $initial)) > 0; $nfound = 0; } last; } $! = 0; return $nfound; } sub can_read { @_ == 1 || @_ == 2 || croak(q/Usage: $handle->can_read([timeout])/); my $self = shift; return $self->_do_timeout('read', @_) } sub can_write { @_ == 1 || @_ == 2 || croak(q/Usage: $handle->can_write([timeout])/); my $self = shift; return $self->_do_timeout('write', @_) } 1;