pax_global_header00006660000000000000000000000064117664161610014523gustar00rootroot0000000000000052 comment=ba097ffeab88653fdd969c1e7c75182decfdbd73 ruby-http-parser-0.1.3/000077500000000000000000000000001176641616100147545ustar00rootroot00000000000000ruby-http-parser-0.1.3/.document000066400000000000000000000000741176641616100165740ustar00rootroot00000000000000README.rdoc lib/**/*.rb bin/* features/**/*.feature LICENSE ruby-http-parser-0.1.3/.gitignore000066400000000000000000000002301176641616100167370ustar00rootroot00000000000000## MAC OS .DS_Store ## TEXTMATE *.tmproj tmtags ## EMACS *~ \#* .\#* ## VIM *.swp ## PROJECT::GENERAL coverage rdoc pkg ## PROJECT::SPECIFIC *.rbc ruby-http-parser-0.1.3/LICENSE000066400000000000000000000020401176641616100157550ustar00rootroot00000000000000Copyright (c) 2009 Graham Batty Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ruby-http-parser-0.1.3/README.rdoc000066400000000000000000000042271176641616100165670ustar00rootroot00000000000000= http_parser This gem provides a (hopefully) high quality http parser library that can build request information iteratively as data comes over the line without requiring the caller to maintain the entire body of the request as a single string in memory. It will also have a full set of specs and a Ruby-native reference library so that it can be used in implementations or environments that do not support C extensions. Simple usage example: p = Http::Parser.new p.parse("GET / HTTP/1.1\r\n") p.parse("Host: blah.com\r\n") p.parse("Cookie: blorp=blah\r\n") p.parse("\r\n") p.method => "GET" p.version => [1,1] p.path => "/" p.headers["HOST"] => "blah.com" p.headers["COOKIE"] => "blorp=blah" If the request is a type that has a body, the body will be available as a stream object via p.body: p = Http::Parser.new p.parse("PUT / HTTP/1.1\r\n") p.parse("Host: blah.com\r\n") p.parse("Content-Type: text/text\r\n") p.parse("Content-Length: 5\r\n") p.parse("\r\n") p.parse("stuff") p.body.read => "stuff" If you use p.parse!, any trailing text that isn't immediately parseable will be left in the string object you pass in while what was parsed will be removed. This allows for you to hand the parser a large glob of data and allow it to figure out what it needs and what it doesn't. When you get more data, you can append it to your existing string and pass that in again until the request is done. You can test if the request is done by using p.done? p = Http::Parser.new s = "GET / HTTP/1.1\r\nHost:" p.parse!(s) s => "Host:" p.done? => false s << " blah.com\r\n" p.parse!(s) s => "" p.done? => false s << "\r\n" p.parse!(s) s => "" p.done? => true == Note on Patches/Pull Requests * Fork the project. * Make your feature addition or bug fix. * Add tests for it. This is important so I don't break it in a future version unintentionally. * Commit, do not mess with rakefile, version, or history. (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull) * Send me a pull request. Bonus points for topic branches. == Copyright Copyright (c) 2010 Graham Batty. See LICENSE for details. ruby-http-parser-0.1.3/Rakefile000066400000000000000000000027271176641616100164310ustar00rootroot00000000000000require 'rubygems' require 'rake' begin require 'jeweler' Jeweler::Tasks.new do |gem| gem.name = "http_parser" gem.summary = %Q{HTTP Parser Library} gem.description = %Q{This gem provides a (hopefully) high quality http parser library that can build request information iteratively as data comes over the line without requiring the caller to maintain the entire body of the request as a single string in memory.} gem.email = "graham@stormbrew.ca" gem.homepage = "http://github.com/stormbrew/http_parser" gem.authors = ["Graham Batty"] gem.add_development_dependency "rspec", ">= 1.2.9" # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings end Jeweler::GemcutterTasks.new rescue LoadError puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler" end require 'spec/rake/spectask' Spec::Rake::SpecTask.new(:spec) do |spec| spec.libs << 'lib' << 'spec' spec.spec_files = FileList['spec/**/*_spec.rb'] end Spec::Rake::SpecTask.new(:rcov) do |spec| spec.libs << 'lib' << 'spec' spec.pattern = 'spec/**/*_spec.rb' spec.rcov = true end task :spec => :check_dependencies task :default => :spec require 'rake/rdoctask' Rake::RDocTask.new do |rdoc| version = File.exist?('VERSION') ? File.read('VERSION') : "" rdoc.rdoc_dir = 'rdoc' rdoc.title = "http_parser #{version}" rdoc.rdoc_files.include('README*') rdoc.rdoc_files.include('lib/**/*.rb') end ruby-http-parser-0.1.3/VERSION000066400000000000000000000000061176641616100160200ustar00rootroot000000000000000.1.3 ruby-http-parser-0.1.3/bench/000077500000000000000000000000001176641616100160335ustar00rootroot00000000000000ruby-http-parser-0.1.3/bench/bench_helper.rb000066400000000000000000000002251176641616100207750ustar00rootroot00000000000000$LOAD_PATH.unshift(File.dirname(__FILE__)) $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) require 'rubygems' require 'benchmark'ruby-http-parser-0.1.3/bench/http_parser_bench.rb000066400000000000000000000012741176641616100220560ustar00rootroot00000000000000require File.expand_path(File.dirname(__FILE__) + '/bench_helper') require 'http/parser' request_body = < 10240, # maximum number of headers that can be passed to the server :max_headers => 100, # the size of the request body before it will be spilled # to a tempfile instead of being stored in memory. :min_tempfile_size => 1048576, # the class to use to create and manage the temporary file. # Must conform to the same interface as the stdlib Tempfile class :tempfile_class => Tempfile, } # Constants for method information MethodInfo = Struct.new(:must_have_body, :can_have_body) Methods = { "OPTIONS" => MethodInfo[false, true], "GET" => MethodInfo[false, false], "HEAD" => MethodInfo[false, false], "POST" => MethodInfo[true, true], "PUT" => MethodInfo[true, true], "DELETE" => MethodInfo[false, false], "TRACE" => MethodInfo[false, false], "CONNECT" => MethodInfo[false, false], } # Regex used to match the Request-Line RequestLineMatch = %r{^([a-zA-Z]+) (.+) HTTP/([0-9]+)\.([0-9]+)\r?\n} # Regex used to match a header line. Lines suspected of # being headers are also checked against the HeaderContinueMatch # to deal with multiline headers HeaderLineMatch = %r{^([a-zA-Z-]+):[ \t]*([[:print:]]+?)\r?\n} HeaderContinueMatch = %r{^[ \t]+([[:print:]]+?)\r?\n} EmptyLineMatch = %r{^\r?\n} # Regex used to match a size specification for a chunked segment ChunkSizeLineMatch = %r{^[0-9a-fA-F]+\r?\n} # Used as a fallback in error detection for a malformed request line or header. AnyLineMatch = %r{^.+?\r?\n} def initialize(options = DefaultOptions) @method = nil @path = nil @version = nil @headers = {} @body = nil @state = :request_line @options = DefaultOptions.merge(options) end # Returns true if the http method being parsed (if # known at this point in the parse) must have a body. # If the method hasn't been determined yet, returns false. def must_have_body? Methods[@method].must_have_body end # Returns true if the http method being parsed (if # known at this point in the parse) can have a body. # If the method hasn't been determined yet, returns false. def can_have_body? Methods[@method].can_have_body end # Returns true if the request has a body. def has_body? @body end # Takes a string and runs it through the parser. Note that # it does not consume anything it can't completely parse, so # you should always pass complete request chunks (lines or body data) # to this method. It's mostly for testing and convenience. # In practical use, you want to use parse!, which will remove parsed # data from the string you pass in. def parse(str) parse!(str.dup) end def parse_request_line(scanner) if (scanner.scan(RequestLineMatch)) @method = scanner[1] @path = scanner[2] @version = [scanner[3].to_i, scanner[4].to_i] @state = :headers if (!Methods[@method]) raise Http::ParserError::NotImplemented end elsif (scanner.scan(EmptyLineMatch)) # ignore an empty line before a request line. elsif (scanner.scan(AnyLineMatch)) raise Http::ParserError::BadRequest end end private :parse_request_line def parse_headers(scanner) if (scanner.scan(HeaderLineMatch)) header = normalize_header(scanner[1]) if (@headers[header]) @headers[header] << "," << scanner[2] else @headers[header] = scanner[2] end @last_header = header elsif (@last_header && scanner.scan(HeaderContinueMatch)) @headers[@last_header] << " " << scanner[1] elsif (scanner.scan(EmptyLineMatch)) req_has_body = @headers["CONTENT_LENGTH"] || @headers["TRANSFER_ENCODING"] if (req_has_body) if (@headers["TRANSFER_ENCODING"] && @headers["TRANSFER_ENCODING"] != 'identity') @state = :body_chunked @body_length = 0 # this will get updated as we go. @body_read = 0 @chunk_remain = nil elsif (@headers["CONTENT_LENGTH"]) @body_length = @headers["CONTENT_LENGTH"].to_i @body_read = 0 if (@body_length > 0) @state = :body_identity else @state = :done end end if (can_have_body?) if (@body_length >= @options[:min_tempfile_size]) @body = @options[:tempfile_class].new("http_parser") File.unlink(@body.to_path) rescue nil # ruby 1.9.1 does Tempfile.unlink wrong, so we do it ourselves. else @body = StringIO.new end else @body = nil end else if (must_have_body?) # we assume it has a body and the client just didn't tell us # how big it was. This is more useful than BadRequest. raise ParserError::LengthRequired else @state = :done end end elsif (scanner.scan(AnyLineMatch)) raise Http::ParserError::BadRequest end end private :parse_headers def parse_body_identity(scanner) remain = @body_length - @body_read addition = scanner.string[scanner.pos, remain] scanner.pos += addition.length @body_read += addition.length @body << addition if @body if (@body_read >= @body_length) @body.rewind if (@body) @state = :done end end private :parse_body_identity def parse_body_chunked(scanner) if (@chunk_remain) if (@chunk_remain > 0) addition = scanner.string[scanner.pos, @chunk_remain] scanner.pos += addition.length @chunk_remain -= addition.length @body_length += addition.length @body << addition if @body if (@body.length >= @options[:min_tempfile_size] && @body.kind_of?(StringIO)) @body_str = @body.string @body = @options[:tempfile_class].new("http_parser") File.unlink(@body.to_path) rescue nil # ruby 1.9.1 does Tempfile.unlink wrong, so we do it ourselves. @body << @body_str end else if (scanner.scan(EmptyLineMatch)) # the chunk is done. @chunk_remain = nil elsif (scanner.scan(AnyLineMatch)) # there was a line with stuff in it, # which is invalid here. raise ParserError::BadRequest end end elsif (scanner.scan(ChunkSizeLineMatch)) @chunk_remain = scanner[0].to_i(16) if (@chunk_remain < 1) @state = :body_chunked_tail end elsif (scanner.scan(AnyLineMatch)) raise ParserError::BadRequest end end private :parse_body_chunked def parse_body_chunked_tail(scanner) # It's not actually clear if tail headers are even # legal in a chunked request entity. The docs seem # to indicate that they should only be sent if the other # end is known to accept them, and there's no way to ensure # that when the client is the originator. As such, we'll # just ignore them for now. We'll do this by ignoring # any line until we hit an empty line, which will be treated # as the end of the entity. if (scanner.scan(EmptyLineMatch)) @state = :done @body.rewind elsif (scanner.scan(AnyLineMatch)) # ignore the line. end end private :parse_body_chunked_tail def parse_done(scanner) # do nothing, the parse is done. end private :parse_done # Consumes as much of str as it can and then removes it from str. This # allows you to iteratively pass data into the parser as it comes from # the client. def parse!(str) scanner = StringScanner.new(str) begin while (!scanner.eos?) start_pos = scanner.pos send(:"parse_#{@state}", scanner) if (scanner.pos == start_pos) # if we didn't move forward, we've run out of useful string so throw it back. return str end end ensure # clear out whatever we managed to scan. str[0, scanner.pos] = "" end end # Normalizes a header name to be UPPERCASE_WITH_UNDERSCORES def normalize_header(str) str.upcase.gsub('-', '_') end private :normalize_header # Given a basic rack environment, will properly fill it in # with the information gleaned from the parsed request. Note that # this only fills the subset that can be determined by the parser # library. Namely, the only rack. variable set is rack.input. You should also # have defaults in place for SERVER_NAME and SERVER_PORT, as they # are required. def fill_rack_env(env = {}) env["rack.input"] = @body || StringIO.new env["REQUEST_METHOD"] = @method env["SCRIPT_NAME"] = "" env["REQUEST_URI"] = @path env["PATH_INFO"], query = @path.split("?", 2) env["QUERY_STRING"] = query || "" if (@headers["HOST"] && !env["SERVER_NAME"]) env["SERVER_NAME"], port = @headers["HOST"].split(":", 2) env["SERVER_PORT"] = port if port end @headers.each do |key, val| if (key == 'CONTENT_LENGTH' || key == 'CONTENT_TYPE') env[key] = val else env["HTTP_#{key}"] = val end end return env end # Returns true if the request is completely done. def done? @state == :done end # Returns true if the request has parsed the request-line (GET / HTTP/1.1) def done_request_line? [:headers, :body_identity, :body_chunked, :body_chunked_tail, :done].include?(@state) end # Returns true if all the headers from the request have been consumed. def done_headers? [:body_identity, :body_chunked, :body_chunked_tail, :done].include?(@state) end # Returns true if the request's body has been consumed (really the same as done?) def done_body? done? end end endruby-http-parser-0.1.3/lib/http/parser.rb000066400000000000000000000024071176641616100203250ustar00rootroot00000000000000module Http require 'http/native_parser' begin require 'http/fast_parser' Parser = FastParser rescue LoadError => e Parser = NativeParser end # An exception class for HTTP parser errors. Includes # an HTTP Error Code number that corresponds to the # difficulty parsing (ie. 414 for Request-URI Too Long) class ParserError < RuntimeError # The error code that corresponds to the parsing error. attr_reader :code # Headers that should be sent back with the error reply as a hash. attr_reader :headers def initialize(string = "Bad Request", code = 400, headers = {}) super(string) @code = code @headers = headers end class BadRequest < ParserError; end class RequestTimeout < ParserError; def initialize(); super("Request Timeout", 408); end; end class LengthRequired < ParserError; def initialize(); super("Length Required", 411); end; end class RequestEntityTooLarge < ParserError; def initialize(); super("Request Entity Too Large", 413); end; end class RequestURITooLong < ParserError; def initialize(); super("Request-URI Too Long", 414); end; end class NotImplemented < ParserError; def initialize(); super("Method Not Implemented", 501); end; end # Send Allow header end endruby-http-parser-0.1.3/metadata.yml000066400000000000000000000036101176641616100172570ustar00rootroot00000000000000--- !ruby/object:Gem::Specification name: http_parser version: !ruby/object:Gem::Version prerelease: false segments: - 0 - 1 - 3 version: 0.1.3 platform: ruby authors: - Graham Batty autorequire: bindir: bin cert_chain: [] date: 2010-03-17 00:00:00 -06:00 default_executable: dependencies: - !ruby/object:Gem::Dependency name: rspec prerelease: false requirement: &id001 !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version segments: - 1 - 2 - 9 version: 1.2.9 type: :development version_requirements: *id001 description: |- This gem provides a (hopefully) high quality http parser library that can build request information iteratively as data comes over the line without requiring the caller to maintain the entire body of the request as a single string in memory. email: graham@stormbrew.ca executables: [] extensions: [] extra_rdoc_files: - LICENSE - README.rdoc files: - .document - .gitignore - LICENSE - README.rdoc - Rakefile - VERSION - bench/bench_helper.rb - bench/http_parser_bench.rb - lib/http/native_parser.rb - lib/http/parser.rb - spec/http_parser_spec.rb - spec/spec.opts - spec/spec_helper.rb has_rdoc: true homepage: http://github.com/stormbrew/http_parser licenses: [] post_install_message: rdoc_options: - --charset=UTF-8 require_paths: - lib required_ruby_version: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version segments: - 0 version: "0" required_rubygems_version: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version segments: - 0 version: "0" requirements: [] rubyforge_project: rubygems_version: 1.3.6 signing_key: specification_version: 3 summary: HTTP Parser Library test_files: - spec/http_parser_spec.rb - spec/spec_helper.rb ruby-http-parser-0.1.3/spec/000077500000000000000000000000001176641616100157065ustar00rootroot00000000000000ruby-http-parser-0.1.3/spec/http_parser_spec.rb000066400000000000000000000327421176641616100216100ustar00rootroot00000000000000require File.expand_path(File.dirname(__FILE__) + '/spec_helper') require 'http/parser' test_parsers = [Http::NativeParser] test_parsers << Http::FastParser if Http.const_defined? :FastParser describe Http::Parser do it "should be a reference to Http::NativeParser, or if present Http::FastParser" do Http.const_defined?(:Parser).should be_true if (Http.const_defined?(:FastParser)) Http::Parser.should == Http::FastParser else Http::Parser.should == Http::NativeParser end end end test_parsers.each do |parser| describe parser do it "should be able to parse a simple GET request" do p = parser.new p.parse("GET / HTTP/1.1\r\n") p.parse("Host: blah.com\r\n") p.parse("Cookie: blorp=blah\r\n") p.parse("\r\n") p.done?.should be_true p.method.should == "GET" p.version.should == [1,1] p.path.should == "/" p.headers["HOST"].should == "blah.com" p.headers["COOKIE"].should == "blorp=blah" end it "should raise an error on a malformed request line" do p = parser.new proc { p.parse("GET / HTTx/balh.blorp\r\n") }.should raise_error(Http::ParserError::BadRequest) proc { p.parse("GET HTTP/1.1\r\n") }.should raise_error(Http::ParserError::BadRequest) end it "should raise an error on a malformed header line" do p = parser.new p.parse("GET / HTTP/1.1\r\n") proc { p.parse("Stuff\r\n") }.should raise_error(Http::ParserError::BadRequest) end it "should be able to parse a request with a body defined by a Content-Length (ie. PUT)" do p = parser.new p.parse("PUT / HTTP/1.1\r\n") p.parse("Host: blah.com\r\n") p.parse("Content-Type: text/text\r\n") p.parse("Content-Length: 5\r\n") p.parse("\r\n") p.parse("stuff") p.body.read.should == "stuff" end describe "fill_rack_env" do it "should fill in a simple request correctly" do p = parser.new p.parse("GET /blah HTTP/1.1\r\nHost: blorp\r\n\r\n") p.done?.should be_true env = p.fill_rack_env env["rack.input"].should be_kind_of(StringIO) env["REQUEST_METHOD"].should == "GET" env["SCRIPT_NAME"].should == "" env["REQUEST_URI"].should == "/blah" env["PATH_INFO"].should == "/blah" env["QUERY_STRING"].should == "" env["SERVER_NAME"].should == "blorp" env["SERVER_PORT"].should be_nil env["HTTP_HOST"].should == "blorp" end it "should give Content-Type and Content-Length as CONTENT_* rather than HTTP_CONTENT_*" do p = parser.new p.parse("POST /blah HTTP/1.1\r\nContent-Type: text/text\r\nContent-Length: 4\r\n\r\ntest") p.done?.should be_true env = p.fill_rack_env env["CONTENT_LENGTH"].should == "4" env["CONTENT_TYPE"].should == "text/text" env["HTTP_CONTENT_LENGTH"].should be_nil env["HTTP_CONTENT_TYPE"].should be_nil end it "should split the query string from the request uri" do p = parser.new p.parse("GET /blah?blorp HTTP/1.1\r\nHost: blorp\r\n\r\n") p.done?.should be_true env = p.fill_rack_env env["REQUEST_URI"].should == "/blah?blorp" env["PATH_INFO"].should == "/blah" env["QUERY_STRING"].should == "blorp" end it "should split the query string from the path only once" do p = parser.new p.parse("GET /blah?blorp?bloop HTTP/1.1\r\nHost:blorp\r\n\r\n") p.done?.should be_true env = p.fill_rack_env env["REQUEST_URI"].should == "/blah?blorp?bloop" env["PATH_INFO"].should == "/blah" env["QUERY_STRING"].should == "blorp?bloop" end it "should split the host from the port when doing SERVER_NAME/SERVER_PORT" do p = parser.new p.parse("GET /blah HTTP/1.1\r\nHost: blorp.com:1234\r\n\r\n") p.done?.should be_true env = p.fill_rack_env env["SERVER_NAME"].should == "blorp.com" env["SERVER_PORT"].should == "1234" end it "should not fill in SERVER_NAME and SERVER_PORT if SERVER_NAME is already set" do p = parser.new p.parse("GET /blah HTTP/1.1\r\nHost: blah.com:324\r\n\r\n") p.done?.should be_true env = p.fill_rack_env({"SERVER_NAME"=>"woop.com"}) env["SERVER_NAME"].should == "woop.com" end end it "should be able to parse two simple requests from the same string" do req = < 1024) p.parse < 1024) p.parse <