Not getting response when the document is having special chars
0 posts in topic
Flat View  Flat View
TOPIC ACTIONS:
 

Posted By:   vali_gouse
Posted On:   Wednesday, March 16, 2011 02:34 AM

Please help following issue: Not getting response when the document is having special chars(Use any doc with special char(ex: &, $, <, >,.....) TestErrorFour.doc Error message: System.FormatException: Invalid length for a Base-64 char array. at System.Convert.FromBase64String(String s) at Summarize.Summarizer.AccumulateBroadcast(String filedata, String givenWords) in c:DocumentSummarizerApp_CodeSummarizer.cs:line 66 Code: File:1 import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.InputStre   More>>

Please help following issue: Not getting response when the document is having special chars(Use any doc with special char(ex: &, $, <, >,.....) TestErrorFour.doc



Error message:

System.FormatException: Invalid length for a Base-64 char array. at
System.Convert.FromBase64String(String s) at
Summarize.Summarizer.AccumulateBroadcast(String filedata, String givenWords) in
c:DocumentSummarizerApp_CodeSummarizer.cs:line 66



Code:

File:1

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.Properties;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.hwpf.*;
import org.apache.poi.hwpf.extractor.*;

import com.lowagie.text.Document;
import com.lowagie.text.pdf.PRTokeniser;
import com.lowagie.text.pdf.PdfReader;

public class DocumentSummarizerClient {
static Properties loadProperties() {
Properties prop = new Properties();
try {
prop.load(DocumentSummarizerClient.class.getClassLoader().getResourceAsStream("vep.properties"));

} catch (Exception ioe) {
ioe.printStackTrace();

}
return prop;
}

public String getSummary(String fileName,String noOfWordsOrPercentage ){
String summaryInputData ="";
String summarizedData="";
String summarizerURL = loadProperties().getProperty("Summarizer.serviceURL");
try {
String fileExtension=fileName.substring(fileName.lastIndexOf(".")+1, fileName.length());

if (fileExtension.equalsIgnoreCase("doc")|| fileExtension.equalsIgnoreCase("txt")|| fileExtension.equalsIgnoreCase("pdf")) {
if (fileExtension.equalsIgnoreCase("txt")) {
BufferedReader bufferedReader = new BufferedReader(
new FileReader(fileName));
String line = null;
while ((line = bufferedReader.readLine()) != null) {
summaryInputData += line;
}
}
if(fileExtension.equalsIgnoreCase("doc")){
POIFSFileSystem fs = null;
fs = new POIFSFileSystem(new FileInputStream(fileName));

HWPFDocument doc = new HWPFDocument(fs);

WordExtractor we = new WordExtractor(doc);

String[] paragraphs = we.getParagraphText();

for( int i=0; i
paragraphs[i] = paragraphs[i].replaceAll("\cM?
?
","");

summaryInputData+= paragraphs[i];
}

}
if(fileExtension.equalsIgnoreCase("pdf")){
Document document = new Document();
document.open();
PdfReader reader = new PdfReader(fileName);

int pageCount =reader.getNumberOfPages();
for(int i=1;i <=pageCount;i++){
byte[] bytes = reader.getPageContent(i);
PRTokeniser tokenizer = new PRTokeniser(bytes);
StringBuffer buffer = new StringBuffer();
while (tokenizer.nextToken()) {
if (tokenizer.getTokenType() == PRTokeniser.TK_STRING) {
buffer.append(tokenizer.getStringValue());
}
}
summaryInputData += buffer.toString();
}
}
}
else{
System.out.println("This is Invalid document. Presntly we support only text,word and PDF documents ");
}

// String encoded =new String (summaryInputData.getBytes("ISO-8859-1"),"UTF-8");
String encoded=Base64Utils.base64Encode(summaryInputData.getBytes());
// encoded =new String (summaryInputData.getBytes("ISO-8859-1"),"UTF-8");
String parameters= "base64String="+encoded+"&noOfWordsOrPercentage="+noOfWordsOrPercentage;
summarizedData= postRequest(parameters,summarizerURL);

String slength= " ";
if(summarizedData.contains(" ")){
summarizedData= summarizedData.substring(summarizedData.indexOf(slength)+slength.length(),summarizedData.indexOf(" "));
summarizedData = replaceVal(summarizedData);
//System.out.println(" <?xml version="1.0" encoding="utf-8"?> <![CDATA["+summarizedData+"]]>");
System.out.println("Summarized data "+summarizedData);
if(summarizedData.contains("Please enter the percentage")){
summarizedData="Data given cannot be summarized further";
}

}
else{
System.out.println("Data given cannot be summarized further");
summarizedData="";

}

} catch (FileNotFoundException e) {
return("The File is not found

"+e.toString());
} catch (IOException e) {
return("The File is already in use

"+e.toString());
} catch (Exception e) {
return(e.toString());
}

return summarizedData;

}

public static String postRequest(String parameters,String webServiceURL) throws Exception{
Properties systemSettings = System.getProperties();
systemSettings.put("http.proxyHost", loadProperties().getProperty("proxyHost"));
systemSettings.put("http.proxyPort", loadProperties().getProperty("proxyPort"));
System.setProperties(systemSettings);
String responseXML = "";

try {

URL url = new URL(webServiceURL);
URLConnection connection = url.openConnection();
HttpURLConnection httpConn = (HttpURLConnection) connection;
byte[] requestXML = parameters.getBytes();




httpConn.setRequestProperty("Content-Length", String
.valueOf(requestXML.length));
httpConn.setRequestProperty("Content-Type",
"application/x-www-form-urlencoded");

httpConn.setRequestMethod("POST");

httpConn.setDoOutput(true);
httpConn.setDoInput(true);


OutputStream out = httpConn.getOutputStream();
out.write(requestXML, 0, requestXML.length);
out.close();

InputStreamReader isr = new InputStreamReader(httpConn
.getInputStream());
BufferedReader br = new BufferedReader(isr);
String temp;
String tempResponse = null;


while ((temp = br.readLine()) != null)
tempResponse = tempResponse + temp;

responseXML = tempResponse;
br.close();
isr.close();
} catch (java.net.MalformedURLException e) {
System.out
.println("Error in postRequest(): Secure Service Required");
} catch (Exception e) {
System.out.println("Error in postRequest(): " + e.getMessage());
}
return responseXML;
}
public String replaceVal(String value) {
if (value == null) {
value = null;
}
value = value.replace(" <", " <");
value = value.replace(">", ">");
value = value.replace("&", "&");

return value;
}

public static void main(String[] args) {
DocumentSummarizerClient testdoc=new DocumentSummarizerClient();
System.out.println("hello");
testdoc.getSummary("C:\working_folder\vep\UnitTestCases\VEP1.0\DocumentSummarizerTestData\TestOne.txt","100%");

}

}


Note: Use any doc with special char(ex: &, $, <, >,.....) TestErrorFour.doc

File 2:

public class Base64Utils {

private static byte[] mBase64EncMap, mBase64DecMap;

/**
* Class initializer. Initializes the Base64 alphabet (specified in RFC-2045).
*/
static {
byte[] base64Map = {
(byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F',
(byte)'G', (byte)'H', (byte)'I', (byte)'J', (byte)'K', (byte)'L',
(byte)'M', (byte)'N', (byte)'O', (byte)'P', (byte)'Q', (byte)'R',
(byte)'S', (byte)'T', (byte)'U', (byte)'V', (byte)'W', (byte)'X',
(byte)'Y', (byte)'Z',
(byte)'a', (byte)'b', (byte)'c', (byte)'d', (byte)'e', (byte)'f',
(byte)'g', (byte)'h', (byte)'i', (byte)'j', (byte)'k', (byte)'l',
(byte)'m', (byte)'n', (byte)'o', (byte)'p', (byte)'q', (byte)'r',
(byte)'s', (byte)'t', (byte)'u', (byte)'v', (byte)'w', (byte)'x',
(byte)'y', (byte)'z',
(byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5',
(byte)'6', (byte)'7', (byte)'8', (byte)'9', (byte)'+', (byte)'/'};
mBase64EncMap = base64Map;
mBase64DecMap = new byte[128];
for (int i=0; i
mBase64DecMap[mBase64EncMap[i]] = (byte) i;
}

/**
* This class isn't meant to be instantiated.
*/
private Base64Utils() {
}

/**
* Encodes the given byte[] using the Base64-encoding,
* as specified in RFC-2045 (Section 6.8).
*
* @param aData the data to be encoded
* @return the Base64-encoded aData
* @exception IllegalArgumentException if NULL or empty array is passed
*/
public static String base64Encode(byte[] aData) {
if ((aData == null) || (aData.length == 0))
throw new IllegalArgumentException("Can not encode NULL or empty byte array.");

byte encodedBuf[] = new byte[((aData.length+2)/3)*4];

// 3-byte to 4-byte conversion
int srcIndex, destIndex;
for (srcIndex=0, destIndex=0; srcIndex < aData.length-2; srcIndex += 3) {
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex] >>> 2) & 077];
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex+1] >>> 4) & 017 |
(aData[srcIndex] < < 4) & 077];
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex+2] >>> 6) & 003 |
(aData[srcIndex+1] < < 2) & 077];
encodedBuf[destIndex++] = mBase64EncMap[aData[srcIndex+2] & 077];
}

// Convert the last 1 or 2 bytes
if (srcIndex < aData.length) {
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex] >>> 2) & 077];
if (srcIndex < aData.length-1) {
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex+1] >>> 4) & 017 |
(aData[srcIndex] < < 4) & 077];
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex+1] < < 2) & 077];
}
else {
encodedBuf[destIndex++] = mBase64EncMap[(aData[srcIndex] < < 4) & 077];
}
}

// Add padding to the end of encoded data
while (destIndex < encodedBuf.length) {
encodedBuf[destIndex] = (byte) '=';
destIndex++;
}

String result = new String(encodedBuf);
return result;
}


/**
* Decodes the given Base64-encoded data,
* as specified in RFC-2045 (Section 6.8).
*
* @param aData the Base64-encoded aData.
* @return the decoded aData .
* @exception IllegalArgumentException if NULL or empty data is passed
*/
public static byte[] base64Decode(String aData) {
if ((aData == null) || (aData.length() == 0))
throw new IllegalArgumentException("Can not decode NULL or empty string.");

byte[] data = aData.getBytes();

// Skip padding from the end of encoded data
int tail = data.length;
while (data[tail-1] == '=')
tail--;

byte decodedBuf[] = new byte[tail - data.length/4];

// ASCII-printable to 0-63 conversion
for (int i = 0; i < data.length; i++)
data[i] = mBase64DecMap[data[i]];

// 4-byte to 3-byte conversion
int srcIndex, destIndex;
for (srcIndex = 0, destIndex=0; destIndex < decodedBuf.length-2;
srcIndex += 4, destIndex += 3) {
decodedBuf[destIndex] = (byte) ( ((data[srcIndex] < < 2) & 255) |
((data[srcIndex+1] >>> 4) & 003) );
decodedBuf[destIndex+1] = (byte) ( ((data[srcIndex+1] < < 4) & 255) |
((data[srcIndex+2] >>> 2) & 017) );
decodedBuf[destIndex+2] = (byte) ( ((data[srcIndex+2] < < 6) & 255) |
(data[srcIndex+3] & 077) );
}

// Handle last 1 or 2 bytes
if (destIndex < decodedBuf.length)
decodedBuf[destIndex] = (byte) ( ((data[srcIndex] < < 2) & 255) |
((data[srcIndex+1] >>> 4) & 003) );
if (++destIndex < decodedBuf.length)
decodedBuf[destIndex] = (byte) ( ((data[srcIndex+1] < < 4) & 255) |
((data[srcIndex+2] >>> 2) & 017) );

return decodedBuf;
}

}

issue 2: Exception when passing 2MB .txt file

-----------------------------------------------------------Steps to reproduce:

Call getSummary() with 2MB .txt file

Actual:

The following exception has occured:

-----------------------------------------------------------1. Error in postRequest(): Unexpected end of file from server
java.lang.NullPointerException

Please provide your precious feedback/suggestions.


Thanks in advanceĀ…..

   <<Less
About | Sitemap | Contact