commit - 56f7be9a447392598b27296e90f5ba238f2a1854
commit + ad1e9cfad75fabce65f5394e84f63d6f5c5b7a93
blob - c2cf4c97f8cdd1dce821545b00610cc60446438e
blob + ab7eeb7aadac5994cbc89655916a2db9543d7f1c
--- src/main/scala/twimpact/jsonv/Main.scala
+++ src/main/scala/twimpact/jsonv/Main.scala
import net.minidev.json.{JSONArray, JSONObject}
import java.io._
import java.util.zip.GZIPInputStream
+import io.Source
/**
* >>Describe Class<<
private def usage(message: Option[String] = None) {
message.foreach(error(_))
error("usage: jsonv [-i] <dump>")
- error(" jvonv [-csv|-tsv] <fields> <dump>")
+ error(" jvonv [-csv|-tsv] [-f] <fields> <dump>")
error("")
error("Start by looking up the fields info from the dump using -i.")
error("Then dump your information in CSV or TSV format by providing the")
System.exit(0)
}
- private def getFileReader(dump: File): BufferedReader = {
- dump match {
+ private def getFileReader(dump: File, readFully: Boolean): BufferedReader = {
+ val is = dump match {
case f if (f.getName.endsWith("gz")) =>
- new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(f))))
+ new GZIPInputStream(new FileInputStream(f))
case f =>
- new BufferedReader(new InputStreamReader(new FileInputStream(f)))
+ new FileInputStream(f)
}
+ if(readFully) {
+ new BufferedReader(new StringReader(Source.fromInputStream(is).mkString("").replaceAll("[\r\n]+", "")))
+ } else {
+ new BufferedReader(new InputStreamReader(is))
+ }
+
}
val SEPARATOR = "sep"
info("jsonv - (c) 2012 Matthias L. Jugel")
if (args.length == 0) usage()
var printInfo = false
+ var readFully = false
var settings: Map[String, String] = Map(SEPARATOR -> "\t")
var input: List[String] = Nil
args.foreach {
settings += (SEPARATOR -> ";")
case "-tsv" =>
settings += (SEPARATOR -> "\t")
+ case "-f" =>
+ readFully = true
case i =>
input = i :: input
}
if (printInfo) {
info("Dumping field information from %s".format(dump))
- val r = getFileReader(dump)
+ val r = getFileReader(dump, readFully)
var line = r.readLine
while (line != null) {
} else {
val fields = input.head.split(",").toList
info("dumping fields: %s".format(fields))
- val r = getFileReader(dump)
+ val r = getFileReader(dump, readFully)
var line = r.readLine
while (line != null) {
val trimmedLine = line.trim
if (trimmedLine.length > 0)
try {
- val json = jsonParser.parse(trimmedLine)
- json match {
- case j: JSONArray if (fields.contains("@array")) =>
- println(j.toArray.mkString(settings(SEPARATOR)))
- case j: JSONObject =>
- println(fields.flatMap(f => value(j, f.split("\\."))).map(_.replaceAll("[\\n\\r]+", " "))
- .mkString(settings(SEPARATOR)))
- case j: Object =>
- info("Only found primitive type in json data: %s".format(j))
- }
+ println(values(jsonParser.parse(trimmedLine), fields, settings(SEPARATOR)))
} catch {
case e: Exception => debug("line can't be parsed: %s".format(e.getMessage))
}
}
}
- private def value(o: Object, keys: Seq[String]): Option[String] = {
+ private def values(json: Object, fields: Seq[String], sep: String): String = {
+ json match {
+ case j: JSONArray =>
+ j.toArray.map(values(_, fields, sep)).mkString("\n")
+ case j: JSONObject =>
+ fields.flatMap(f => value(j, f.split("\\."), sep))/*.map(_.replaceAll("[\\n\\r]+", " "))*/.mkString(sep)
+ case j: Object =>
+ info("Only found primitive type in json data: %s".format(j))
+ ""
+ }
+ }
+
+ private def value(o: Object, keys: Seq[String], sep: String): Option[String] = {
if (keys.length > 1 && o.isInstanceOf[JSONObject])
- value(o.asInstanceOf[JSONObject].get(keys.head), keys.drop(1))
+ value(o.asInstanceOf[JSONObject].get(keys.head), keys.drop(1), sep)
else {
val key = keys.head
o match {
- case j: JSONArray if (key == "@array") =>
- error("%s: sub-arrays not supported".format(key))
- None
+ case j: JSONArray if (key.startsWith("@array(")) =>
+ Some(values(j, key.substring(7, key.length - 1).split(";").map(_.trim), sep))
+// error("%s: sub-arrays not supported".format(key))
+// None
case j: JSONObject =>
- Some(j.get(key).toString)
+ Some(quote(j.get(key)))
case j: Object =>
- Some(j.toString)
+ Some(quote(j))
}
}
}
+
+ private def quote(o: Any): String = if(o.isInstanceOf[String]) "\"%s\"".format(o.toString.replaceAll("\"", "\"\"")) else o.toString
}