SQL for the Simplified Proten-Verb-Protein Pattern


SELECT lql.p1, lql.verb, lql.p2, COUNT(*) AS cnt
FROM (
   SELECT p1.content AS p1, verb_biotext_annotation_word_1.word AS verb, p2.content AS p2
FROM BIOTEXT_ANNOTATION table_1
JOIN BIOTEXT_ANNOTATION table_2 ON
   table_1.pmid = table_2.pmid AND
   table_1.section = table_2.section AND
   table_1.sentence = table_2.sentence AND
   table_1.first_word_pos <= table_2.first_word_pos
JOIN BIOTEXT_ANNOTATION p1 ON
   table_1.pmid = p1.pmid AND
   table_1.section = p1.section AND
   table_1.sentence = p1.sentence AND
   table_2.first_word_pos <= p1.first_word_pos AND
   table_2.last_word_pos = p1.last_word_pos
JOIN BIOTEXT_ANNOTATION verb ON
   table_1.pmid = verb.pmid AND
   table_1.section = verb.section AND
   table_1.sentence = verb.sentence AND
   table_2.last_word_pos = verb.first_word_pos
JOIN BIOTEXT_ANNOTATION table_3 ON
   table_1.pmid = table_3.pmid AND
   table_1.section = table_3.section AND
   table_1.sentence = table_3.sentence AND
   verb.last_word_pos = table_3.first_word_pos AND
   table_1.last_word_pos >= table_3.last_word_pos
JOIN BIOTEXT_ANNOTATION p2 ON
   table_1.pmid = p2.pmid AND
   table_1.section = p2.section AND
   table_1.sentence = p2.sentence AND
   table_3.first_word_pos <= p2.first_word_pos AND
   table_3.last_word_pos = p2.last_word_pos
JOIN biotext_annotation_word verb_biotext_annotation_word_1 ON verb.word_id = verb_biotext_annotation_word_1.word_id
WHERE table_1.layer_id = 4
   AND (table_2.layer_id = 3 AND table_2.tag_type IN (31, 1031))
   AND p1.layer_id = 5
   AND ((verb.layer_id = 1 AND verb.tag_type IN (53, 54, 55, 56, 57, 58, 1053, 1054, 1055, 1056, 1057, 1058)) AND ((verb.word_id IN (13438, 1401267, 1704480, 1837370, 1368480, 1395664, 13439, 18300, 1430513, 685368, 1741812, 1701778, 1340645, 1631134, 1132586, 291546, 1822280, 173301, 13440, 776601, 1560108, 1625212, 1740820, 1385307, 841475, 200102, 497953, 950554, 711830, 799005, 1516888, 641725, 1054225, 1159279, 1809935, 501904, 733023, 1159280, 13441, 1352215, 1680196, 1711807, 1791082, 778210, 1041124, 1567429, 315488, 632663, 868369, 903049, 1819793, 159279, 13442, 1137580, 1429059, 1490034) OR verb.word_id IN (55584, 1287422, 492858, 1333491, 642430, 486130, 1597803, 1733772, 1427784, 132755, 1497343, 821336, 55585, 578153, 310310, 375787, 790326, 671810, 215852, 55586, 1380966, 1411468, 1718409, 1536878, 1348101, 535532, 1493083, 203048, 912589, 471713, 990054, 664554, 1671707, 150639, 1291320, 55587, 58238, 1035152, 1163393, 1734845, 1023721, 55588, 55589, 58239, 1334490, 478798, 1520510, 568192, 595670, 1599821, 876702, 1633958, 1212944, 126964, 55590, 1506222, 750803, 214950, 1319808, 1628470, 1263661, 990126, 1505075, 282531, 473463, 1108028, 494442, 857399, 1157753, 1591570, 909932, 784684, 193272, 657898, 1179671, 1707728, 1050449, 1584710, 1436334, 1252251, 595207, 1601729, 854770, 977536, 931825, 547824, 1074573, 942961, 1793275, 1658831, 548689, 1259031, 954718, 510792, 1201275, 961955, 702888, 1783230, 811807, 1446418, 1174529, 207405, 448593, 157985, 1405148, 208875, 55591, 619167, 1760162, 662163, 55592, 306832, 581599, 452670, 986341, 1710550, 55593, 58240, 1401551, 816838, 1270702, 551741, 722957, 567135, 674865, 685052, 1604253, 841845, 518478, 395451, 630926, 657063, 1303137, 1373157, 507408, 200073, 160650, 996238, 337383, 863361, 1423886, 1514962, 897583, 615271, 437767, 1405224, 1345411, 270299, 55594, 692430, 1500019, 1477162, 747090, 1821325, 209870, 235038, 1150235, 1783236, 1664611, 1656543, 1159514, 936014, 55595, 495229, 1583521, 1033259, 667000, 55596, 1051530, 1280257, 521652, 1586983, 690598, 55597, 55598, 394126, 251005, 1072743, 415383, 364449, 171679, 1197022, 200471, 1681865, 260371, 1260331, 540730, 666998, 55599, 516739, 1703355, 1381580, 142653, 1043462, 1719824, 704262, 1324493, 533465, 283057, 1381387, 55600, 188506, 1376806, 339321, 511925, 714474, 1268297, 1012299, 244765, 1472247, 231865, 640443, 573710, 860760, 140183, 651563, 1353203, 1169091, 884413, 1351164, 55601, 1423641, 871656, 348016, 1811386, 533472, 713096, 1696806, 1791143, 1819254, 1576624, 1517413, 1639599, 1282394, 1272832, 439035, 1519194, 1362450, 822203, 339630, 684582, 947095, 335100, 1200904, 506617, 1320881, 921176, 595022, 1812300, 1262839, 913020, 645981, 1324646, 55602, 533130, 450410, 55603, 121630, 156752, 728684, 1650177, 55604, 614813, 329415, 1494435, 1238025, 1626748, 1344347, 277029, 1661287, 348007, 348012, 1350365, 1246856, 1354500, 869202, 1715546, 157804, 1603459, 799516, 932977, 1551689, 616847, 1831160, 412207, 1269542, 1777594, 1771073, 1587823, 466499, 1611217, 1137995, 339147, 1044812, 1749421, 1359472, 698317, 1536017, 1810369, 724155, 1817612, 1542069, 1649689, 566343, 1508531, 569093, 1229530, 1182181, 973576, 352179, 1459235, 1080236, 1372659, 1372643, 1227961, 1147943, 953032, 1500789, 1577613, 580981, 598095, 1595577, 1534579, 55605, 1400880, 705765, 698319, 447040, 634358, 647728, 133185, 1119629, 1119630, 329310, 965946, 1599421, 385745, 55606, 58241, 1230750, 1034268, 796983, 414606, 1480713, 1270685, 1187999, 299165, 1686658, 434864, 1188003, 574176, 1608622, 1508500, 1493004, 223167, 1047762, 390250, 1565258, 1289396, 1027141, 1047061, 1257872, 558774, 883617, 1276736, 1218178, 634199, 1101766, 1270665, 1402042, 717481, 1327303, 1043559, 1652923, 462273, 1082528, 1362746, 1257856, 1807734, 461443, 928557, 1239560, 1760737, 1188663, 1521031, 1699442, 1257854, 577581, 1024236, 1457002, 1598771, 1337792, 1298667, 1841244, 1416595, 1134661, 1823263, 149817, 819182, 1205140, 1108619, 399451, 1727186, 1272754, 1835327, 1835326, 1780734, 718271, 726222, 825546, 437910, 860488, 1782468, 523773, 132785, 1783226, 1218551, 1804881, 476254, 705763, 899443, 1171042, 1171043, 55607, 58242, 1762689, 1762690, 1442800, 1732557, 1214162, 1037602, 1527214, 258358, 1272523, 1546678, 605837, 1154285, 277231, 795836, 1278784, 227197, 1436298, 641265, 700945, 398101, 1497603, 1498217, 1840852, 798808, 1684339, 1453720, 55608, 1733409, 706589)) OR verb.word_id IN (21770, 1233710, 1483592, 1401661, 1535086, 1142508, 679829, 948586, 1582423, 1450050, 518585, 733807, 378597, 378602, 760782, 238839, 569910, 21771, 23815, 1540144, 989402, 235144, 834922, 21772, 1567213, 1127825, 1065840, 985146, 1671071, 436071, 995016, 1406960, 21773, 23816, 1016699, 1713082, 981970, 1770408, 1027133, 558711, 664423, 871795, 1032934, 1618581, 168161, 1760534, 955132, 1511109, 1134383, 466313, 738823, 597883, 1179279, 932540, 948627, 1121387, 520216, 1530858, 1774364, 1567361, 882914, 632401, 23817, 663194, 168158, 949540, 318661, 21774, 849928, 789007, 1021125, 1693902, 891149, 1776453, 345505, 1261646, 594291, 747115, 1798194, 1803828, 704909, 1196665, 963827, 1785072, 355065, 226259, 277045, 719216, 1079151, 602348, 1497429, 744606, 1093410, 844539, 668555, 1444975, 1290892, 1607269, 1432769, 1248887, 783593, 755975, 1546218, 1382512, 466323, 348319, 982706, 801473, 1382831, 1171036, 366356, 526625, 402707, 256037, 1371560, 578379, 1755995, 1768850, 1629153, 21775, 1127242, 610082, 547061, 1306546, 21776, 1273158, 481147, 802521, 941325, 1273641, 1806321, 1790377, 1417570, 379296, 373286, 1134896, 1359384, 1540121, 890488, 1551510, 155910, 800572, 933655, 580781, 556797, 1226575, 1353498, 1107314, 1117420, 331949, 808921, 1769503, 1563026, 396522, 396507, 1611911, 821360, 834858, 990094, 1163966, 556292, 850497, 672162, 1498082, 1606648, 745906, 1655600, 1612870, 374885, 21777, 1653013, 21778, 1268550, 504579, 1284880, 979636, 609234, 800573, 1274910, 214889, 672424, 21779, 805561, 827506, 1192914, 952055, 1220302, 782844, 782838, 1313241, 21780, 1010958, 604825, 861579, 1395381, 837303, 747530, 1077965, 354557, 235585, 21781, 595566, 21782, 1751683, 1436040, 1313272, 1041639, 1767576, 1767572, 370145, 370146)))
   AND (table_3.layer_id = 3 AND table_3.tag_type IN (31, 1031))
   AND p2.layer_id = 5
) AS lql
GROUP BY lql.p1, lql.verb, lql.p2
ORDER BY cnt DESC