commit ad1e9cfad75fabce65f5394e84f63d6f5c5b7a93 from: Matthias L. Jugel date: Thu Mar 8 19:45:27 2012 UTC added extracting arrays commit - 56f7be9a447392598b27296e90f5ba238f2a1854 commit + ad1e9cfad75fabce65f5394e84f63d6f5c5b7a93 blob - c2cf4c97f8cdd1dce821545b00610cc60446438e blob + ab7eeb7aadac5994cbc89655916a2db9543d7f1c --- src/main/scala/twimpact/jsonv/Main.scala +++ src/main/scala/twimpact/jsonv/Main.scala @@ -10,6 +10,7 @@ import net.minidev.json.parser.JSONParser import net.minidev.json.{JSONArray, JSONObject} import java.io._ import java.util.zip.GZIPInputStream +import io.Source /** * >>Describe Class<< @@ -23,7 +24,7 @@ object Main extends Logging { private def usage(message: Option[String] = None) { message.foreach(error(_)) error("usage: jsonv [-i] ") - error(" jvonv [-csv|-tsv] ") + error(" jvonv [-csv|-tsv] [-f] ") error("") error("Start by looking up the fields info from the dump using -i.") error("Then dump your information in CSV or TSV format by providing the") @@ -31,13 +32,19 @@ object Main extends Logging { System.exit(0) } - private def getFileReader(dump: File): BufferedReader = { - dump match { + private def getFileReader(dump: File, readFully: Boolean): BufferedReader = { + val is = dump match { case f if (f.getName.endsWith("gz")) => - new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(f)))) + new GZIPInputStream(new FileInputStream(f)) case f => - new BufferedReader(new InputStreamReader(new FileInputStream(f))) + new FileInputStream(f) } + if(readFully) { + new BufferedReader(new StringReader(Source.fromInputStream(is).mkString("").replaceAll("[\r\n]+", ""))) + } else { + new BufferedReader(new InputStreamReader(is)) + } + } val SEPARATOR = "sep" @@ -46,6 +53,7 @@ object Main extends Logging { info("jsonv - (c) 2012 Matthias L. Jugel") if (args.length == 0) usage() var printInfo = false + var readFully = false var settings: Map[String, String] = Map(SEPARATOR -> "\t") var input: List[String] = Nil args.foreach { @@ -56,6 +64,8 @@ object Main extends Logging { settings += (SEPARATOR -> ";") case "-tsv" => settings += (SEPARATOR -> "\t") + case "-f" => + readFully = true case i => input = i :: input } @@ -73,7 +83,7 @@ object Main extends Logging { if (printInfo) { info("Dumping field information from %s".format(dump)) - val r = getFileReader(dump) + val r = getFileReader(dump, readFully) var line = r.readLine while (line != null) { @@ -105,22 +115,13 @@ object Main extends Logging { } else { val fields = input.head.split(",").toList info("dumping fields: %s".format(fields)) - val r = getFileReader(dump) + val r = getFileReader(dump, readFully) var line = r.readLine while (line != null) { val trimmedLine = line.trim if (trimmedLine.length > 0) try { - val json = jsonParser.parse(trimmedLine) - json match { - case j: JSONArray if (fields.contains("@array")) => - println(j.toArray.mkString(settings(SEPARATOR))) - case j: JSONObject => - println(fields.flatMap(f => value(j, f.split("\\."))).map(_.replaceAll("[\\n\\r]+", " ")) - .mkString(settings(SEPARATOR))) - case j: Object => - info("Only found primitive type in json data: %s".format(j)) - } + println(values(jsonParser.parse(trimmedLine), fields, settings(SEPARATOR))) } catch { case e: Exception => debug("line can't be parsed: %s".format(e.getMessage)) } @@ -129,20 +130,35 @@ object Main extends Logging { } } - private def value(o: Object, keys: Seq[String]): Option[String] = { + private def values(json: Object, fields: Seq[String], sep: String): String = { + json match { + case j: JSONArray => + j.toArray.map(values(_, fields, sep)).mkString("\n") + case j: JSONObject => + fields.flatMap(f => value(j, f.split("\\."), sep))/*.map(_.replaceAll("[\\n\\r]+", " "))*/.mkString(sep) + case j: Object => + info("Only found primitive type in json data: %s".format(j)) + "" + } + } + + private def value(o: Object, keys: Seq[String], sep: String): Option[String] = { if (keys.length > 1 && o.isInstanceOf[JSONObject]) - value(o.asInstanceOf[JSONObject].get(keys.head), keys.drop(1)) + value(o.asInstanceOf[JSONObject].get(keys.head), keys.drop(1), sep) else { val key = keys.head o match { - case j: JSONArray if (key == "@array") => - error("%s: sub-arrays not supported".format(key)) - None + case j: JSONArray if (key.startsWith("@array(")) => + Some(values(j, key.substring(7, key.length - 1).split(";").map(_.trim), sep)) +// error("%s: sub-arrays not supported".format(key)) +// None case j: JSONObject => - Some(j.get(key).toString) + Some(quote(j.get(key))) case j: Object => - Some(j.toString) + Some(quote(j)) } } } + + private def quote(o: Any): String = if(o.isInstanceOf[String]) "\"%s\"".format(o.toString.replaceAll("\"", "\"\"")) else o.toString }