uk.ac.gla.dcs.renaissance.util
Class WarcHTMLResponseRecord

java.lang.Object
  extended by uk.ac.gla.dcs.renaissance.util.WarcHTMLResponseRecord

public class WarcHTMLResponseRecord
extends Object


Constructor Summary
WarcHTMLResponseRecord()
          Default constructor
WarcHTMLResponseRecord(WarcHTMLResponseRecord o)
          Copy constructor
WarcHTMLResponseRecord(WarcRecord o)
          Constructor creation from a generic WARC record
 
Method Summary
 String getHTMLContent()
           
 WarcRecord getRawRecord()
           
 long getStartMarker()
          Gets the start marker, the position in the stream where the WARC record starts.
 long getStopMarker()
          Gets the stop marker, the position in the stream where the WARC record end.
 int getStopMarkerDiff()
          Gets the difference between the start and stop marker
 String getTargetTrecID()
           
 String getTargetURI()
           
 Vector<String> getURLOutlinks()
          Gets a vector of normalized URLs (normalized to this target URI) of the outlinks of the page
 boolean isHTMLResponse()
          Test if the underlying record is really a HTML response.
 void setRecord(WarcRecord o)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

WarcHTMLResponseRecord

public WarcHTMLResponseRecord()
Default constructor


WarcHTMLResponseRecord

public WarcHTMLResponseRecord(WarcHTMLResponseRecord o)
Copy constructor

Parameters:
o -

WarcHTMLResponseRecord

public WarcHTMLResponseRecord(WarcRecord o)
Constructor creation from a generic WARC record

Parameters:
o -
Method Detail

setRecord

public void setRecord(WarcRecord o)

isHTMLResponse

public boolean isHTMLResponse()
Test if the underlying record is really a HTML response.

Returns:
true if record is an HYTML response, false otherwise

getRawRecord

public WarcRecord getRawRecord()

getTargetURI

public String getTargetURI()

getTargetTrecID

public String getTargetTrecID()

getURLOutlinks

public Vector<String> getURLOutlinks()
Gets a vector of normalized URLs (normalized to this target URI) of the outlinks of the page

Returns:

getStopMarkerDiff

public int getStopMarkerDiff()
Gets the difference between the start and stop marker

Returns:
the difference between start and stop marker or -1 if this value is undefined

getStopMarker

public long getStopMarker()
Gets the stop marker, the position in the stream where the WARC record end.

Returns:
the stop marker or -1 if this value is undefined

getStartMarker

public long getStartMarker()
Gets the start marker, the position in the stream where the WARC record starts.

Returns:
the start marker or -1 if this value is undefined

getHTMLContent

public String getHTMLContent()


Copyright © 2011. All Rights Reserved.