root/trunk/whisperlib/net/http/save_headers.py

Revision 7, 2.4 kB (checked in by whispercastorg, 2 years ago)

version 0.2.0

  • Property svn:executable set to
Line 
1 #!/usr/bin/python
2 #
3 # Copyright (c) 2009, Whispersoft s.r.l.
4 # All rights reserved.
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are
8 # met:
9 #
10 # * Redistributions of source code must retain the above copyright
11 # notice, this list of conditions and the following disclaimer.
12 # * Redistributions in binary form must reproduce the above
13 # copyright notice, this list of conditions and the following disclaimer
14 # in the documentation and/or other materials provided with the
15 # distribution.
16 # * Neither the name of Whispersoft s.r.l. nor the names of its
17 # contributors may be used to endorse or promote products derived from
18 # this software without specific prior written permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #
32 import os
33 import sys
34
35 def process_wget_out(f, saved_lines):
36     lines = open(f).readlines()
37     save_lines = False
38     for l in lines:
39         if ( l.startswith('---response begin---') or
40              l.startswith('---request begin---') ):
41             save_lines = True
42         elif ( l.startswith('---response end---') or
43                l.startswith('---request end---' ) ):
44             save_lines = False
45         elif save_lines:
46             saved_lines.append(l)
47
48
49 def get_headers(f, fout):
50     saved_lines = []
51     lines = open(f).readlines()
52     for url in lines:
53         url.strip();
54         print "Getting: %s" % url
55         os.system("wget -d -o /tmp/hdrs1 -O /tmp/xxx_out '%s'" % url)
56         os.unlink("/tmp/xxx_out");
57         process_wget_out("/tmp/hdrs1", saved_lines)
58     fout.write(''.join(saved_lines))
59
60 if __name__ == "__main__":
61     fout = open(sys.argv[2], "w")
62     get_headers(sys.argv[1], fout)
63     fout.close()
Note: See TracBrowser for help on using the browser.