Cuffdiff mining by MySQL

Schema file for isoform_exp.diff from cuffdiff
isoformdiff.sql

DROP TABLE IF EXISTS `isoformdiff`;
SET @saved_cs_client     = @@character_set_client;
SET character_set_client = utf8;
CREATE TABLE `isoformdiff` (
  `diff_id` varchar(31) NOT NULL,
  `test_id` varchar(31) NOT NULL,
  `gene_id` varchar(31) NOT NULL,
  `gene` varchar(31) NOT NULL,
  `locus` varchar(31) NOT NULL,
  `sample_1` varchar(31) NOT NULL,
  `sample_2` varchar(31) NOT NULL,
  `status` varchar(10) NOT NULL,
  `value_1` float NOT NULL,
  `value_2` float NOT NULL,
  `log2_foldchange` float NOT NULL,
  `test_stat` varchar(10) NOT NULL,
  `p_value` float NOT NULL,
  `q_value` float NOT NULL,
  `significant` varchar(5) NOT NULL,
   KEY `test_id` (`test_id`),
   KEY `locus` (`locus`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
SET character_set_client = @saved_cs_client;

Generate db and populates

mysql -u suk mm9 < isoformdiff.sql
mawk 'BEGIN {FS="\t"};{print "hippo_vs_heart\t"$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10"\t"$11"\t"$12"\t"$13"\t"$14}' gene_exp.diff |mysql -u suk mm9 --local-infile=1  -e 'LOAD DATA LOCAL INFILE "/dev/stdin" INTO TABLE isoformdiff FIELDS TERMINATED BY "\t";'

Some query

SELECT isoformdiff.diff_id, isoformdiff.test_id, isoformdiff.log2_foldchange, isoformdiff.q_value, GROUP_CONCAT(uniprotTogo.go),COALESCE(sp.description, kg.description, re.description)
FROM isoformdiff
LEFT JOIN kgXref as sp on isoformdiff.test_id = sp.spID
LEFT JOIN kgXref as kg on isoformdiff.test_id = kg.kgID
LEFT JOIN kgXref as re on isoformdiff.test_id = re.protAcc
INNER JOIN uniprotTogo on sp.spID = uniprotTogo.uniprot
WHERE 
isoformdiff.significant="yes" AND 
isoformdiff.q_value  1 OR isoformdiff.value_2 >1) 
GROUP BY isoformdiff.test_id;

Count by GO accessions

SELECT uniprotTogo.go, COUNT(uniprotTogo.go), t.name
FROM isoformdiff
LEFT JOIN kgXref as sp on isoformdiff.test_id = sp.spID
LEFT JOIN kgXref as kg on isoformdiff.test_id = kg.kgID
LEFT JOIN kgXref as re on isoformdiff.test_id = re.protAcc
INNER JOIN uniprotTogo on sp.spID = uniprotTogo.uniprot
INNER JOIN mygo.term as t on t.acc = uniprotTogo.go
WHERE 
isoformdiff.significant="yes" AND 
isoformdiff.q_value  1 OR isoformdiff.value_2 >1) 
GROUP BY uniprotTogo.go
ORDER BY COUNT(uniprotTogo.go);

Yes,I know my sql queries are suck😛 Anyway, they are just for my reminders…LOL

One thought on “Cuffdiff mining by MySQL

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s