cXxSSKrSSKrSSKrSSKrSSKrSSKJr SSKJ r SSKJ r SSK J r J r "SS5r"SS \\R5r"S S \\R5r"S S \\R5r"SS\\R5r"SS\5r"SS\\R5r"SS\\R5r"SS\5r"SS\\R5r"SS\\R5r"SS\\R5r"SS\\R5r"S S!\5r"S"S#\\R5r"S$S%\\R5r"S&S'\\R5r"S(S)\\R5r "S*S+\\R5r!"S,S-\\R5r""S.S/\ 5r#\RH"\RJS05"S1S2\R55r&\RN"5"S3S4\R55r(\)S5:Xa\RT"5 gg)6N)support) socket_helper)threading_helper)BaseHTTPRequestHandler HTTPServercF\rSrSrSrSr/r/rSrSr Sr Sr Sr S r S rg) BaseRobotTest test_robotparserNc[R"UR5R5n[R R 5UlURRU5 gN) ioStringIO robots_txt readlinesurllib robotparserRobotFileParserparserparse)selfliness r?rrrArA.sJ  D ACrrAc&\rSrSrSr/SQrS/rSrg)CrawlDelayAndCustomAgentTest9z# robots.txt for http://www.example.com/ User-agent: * Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/ # This is an infinite virtual URL space # Cybermapper knows where to go. User-agent: cybermapper Disallow: )rCrD) cybermapperrErEr?NrGr?rrrIrI9s J ND ' (CrrIc.\rSrSrSrSS/rS/rSS/rSrg ) SitemapTestJa# robots.txt for http://www.example.com/ User-agent: * Sitemap: http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xml Sitemap: http://www.google.com/hostednews/sitemap_index.xml Request-rate: 3/15 Disallow: /cyberworld/map/ # This is an infinite virtual URL space rCrDrEz7http://www.gstatic.com/s2/sitemaps/profiles-sitemap.xmlz2http://www.google.com/hostednews/sitemap_index.xmlr?N) r:r;r<r=rr(r0r6r>r?rrrMrMJs+ J  D ' (CJEGIrrMc$\rSrSrSr/r/SQrSrg)RejectAllRobotsTest[z(# go away User-agent: * Disallow: / )rErC/tmp/r?NrGr?rrrPrP[sJ D 6CrrPc"\rSrSrSrSrSrSrg)BaseRequestRateTesteNcURnURUR-GHnURU5up2UR X#S9 UR UR U5UR 5 URU5nUR X@R5 URbURU[RR5 UR URURR5 UR URURR5 SSS5 GM g!,(df  GM3=fr')rr(r0r$r)r5 crawl_delay request_rateassertIsInstancerr RequestRaterequestsseconds)rrr#r!parsed_request_rates rtest_request_rate%BaseRequestRateTest.test_request_rateis99txx'C//4JE#3  !3!3E!:Dr?rrrTrTesLKrrTc\rSrSrSrS/rSrg) EmptyFileTestr z/foor?N)r:r;r<r=rr(r>r?rrrarasJ 8Drracf\rSrSrSrSr\RRSS5r Sr S/r /SQr S r g ) CrawlDelayAndRequestRateTestzUser-agent: figtree Crawl-delay: 3 Request-rate: 9/30 Disallow: /tmp Disallow: /a%3cd.html Disallow: /a%2fb.html Disallow: /%7ejoe/index.html figtree )rfrF)/tmpz /tmp.html /tmp/a.html /a%3cd.html /a%3Cd.htmlz /a%2fb.htmlz/~joe/index.htmlr?N)r:r;r<r=rr!rrrZrXrWr(r0r>r?rrrdrds<J E%%11!R8LK $ %D .Crrdc\rSrSrSrSrg)DifferentAgentTestzFigTree Robot libwww-perl/5.04r?Nr:r;r<r=r!r>r?rrroros ,Erroc*\rSrSrSrS/r/SQrSrSrg)InvalidRequestRateTestzUser-agent: * Disallow: /tmp/ Disallow: /a%3Cd.html Disallow: /a/b.html Disallow: /%7ejoe/index.html Crawl-delay: 3 Request-rate: 9/banana rj)rRrkrlrmz /a/b.htmlz/%7Ejoe/index.htmlrir?N) r:r;r<r=rr(r0rWr>r?rrrsrssJ 8D !CKrrsc"\rSrSrSrS/r/rSrg)InvalidCrawlDelayTestz2User-Agent: * Disallow: /. Crawl-delay: pears rFr?NrGr?rrrvrvsJ =D Crrvc(\rSrSrSrSrS/rS/rSrg)AnotherInvalidRequestRateTestzeUser-agent: Googlebot Allow: /folder1/myfile.html Disallow: /folder1/ Request-rate: whale/banana Googlebot/folder1/myfile.html/folder1/anotherfile.htmlr?N r:r;r<r=rr!r(r0r>r?rrryrys J E " #D & 'Crryc"\rSrSrSrSrS/rSrg)UserAgentOrderingTestzMUser-agent: Googlebot Disallow: / User-agent: Googlebot-Mobile Allow: / r{z/something.jpgr?N)r:r;r<r=rr!r0r>r?rrrrsJ E  Crrc\rSrSrSrSrg)UserAgentGoogleMobileTestzGooglebot-Mobiler?Nrqr?rrrrs Errc(\rSrSrSrSrS/rS/rSrg)GoogleURLOrderingTestzJUser-agent: Googlebot Allow: /folder1/myfile.html Disallow: /folder1/ googlebotr|r}r?Nr~r?rrrrs J E " #D & 'Crrc$\rSrSrSrS/rS/rSrg)DisallowQueryStringTestz2User-agent: * Disallow: /some/path?name=value /some/pathz/some/path?name=valuer?NrGr?rrrrsJ >D " #Crrc$\rSrSrSrS/rS/rSrg)UseFirstUserAgentWildcardTestzNUser-agent: * Disallow: /some/path User-agent: * Disallow: /another/path z /another/pathrr?NrGr?rrrrsJ  D .Crrc$\rSrSrSrS/rS/rSrg)EmptyQueryStringTestz>User-agent: * Allow: /some/path? Disallow: /another/path? z /some/path?z/another/path?r?NrGr?rrrrsJ ?D  Crrcb\rSrSrSr\R RSS5rSr SS/r S/r S r g ) DefaultEntryTestizOUser-agent: * Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/ rirCrDrEr?N) r:r;r<r=rrrrZrXrWr(r0r>r?rrrrs:J %%11!R8LK  D ' (Crrc"\rSrSrSrSrSrSrg)StringFormattingTestizUser-agent: * Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/ # This is an infinite virtual URL space # Cybermapper knows where to go. User-agent: cybermapper Disallow: /some/path zxUser-agent: cybermapper Disallow: /some/path User-agent: * Crawl-delay: 1 Request-rate: 3/15 Disallow: /cyberworld/map/cbUR[UR5UR5 gr)r5strrexpected_outputr7s rtest_string_formatting+StringFormattingTest.test_string_formatting*s! T[[)4+?+?@rr?N)r:r;r<r=rrrr>r?rrrrs JOArrc \rSrSrSrSrSrg) RobotHandleri.c(URSS5 g)NizForbidden access) send_errorr7s rdo_GETRobotHandler.do_GET0s /0rcgrr?)rformatargss r log_messageRobotHandler.log_message3s rr?N)r:r;r<r=rrr>r?rrrr.s 1 rrz&Socket server requires working socket.cD\rSrSrSrSr\RS5rSr g)PasswordProtectedSiteTestCasei7cXUR[RR5 [ [ R S4[5Ul[R"SURRSS0S9Ul SURl URR5 g)NrzHTTPServer serving poll_intervalg{Gz?)nametargetkwargsT) addCleanuprrequest urlcleanuprrHOSTrserver threadingThread serve_forevertdaemonstartr7s rr#PasswordProtectedSiteTestCase.setUp=sw 112 -"4"4a!8,G !!%;;,,$D) +   rcURR5 URR5 URR 5 gr)rshutdownrjoin server_closer7s rtearDown&PasswordProtectedSiteTestCase.tearDownMs/      "rcDURRnS[R-S-[ US5-nUS-n[ R R5nURU5 UR5 URURSU55 g)Nzhttp://:rz /robots.txt*) rserver_addressrrrrrrset_urlreadr1r+)raddrr# robots_urlrs rtestPasswordProtectedSite7PasswordProtectedSiteTestCase.testPasswordProtectedSiteRs{{))-,,,s2Sa\A=( ##335s  ))#z:;r)rrN) r:r;r<r=rrr reap_threadsrr>r?rrrr7s&  # ""<#r?rrrr]s@,H)00:J  8C4rr__main__)+rrrunittesturllib.robotparserrtestr test.supportrr http.serverrrr TestCaserArIrMrPrTrardrorsrvryrrrrrrrrr skipUnlesshas_socket_supportrrequires_working_socketrr:mainr?rrrs% &):BBDBM8+<+<B)=(2C2C)"G-!2!2G"7-):):7-8'):): .#68I8I.$-5- ]H,=,=   M8+<+<   (M83D3D ( M8+<+<  5 (M8+<+< ($mX->->$ M83D3D =(*;*; )*H,=,= )A=(*;*;A4 )   ,