SQL for NC_PARAPHRASE_PREPOSITIONAL_RIGHT_4


SELECT prep_word.word_lower AS prep_low,
       COUNT(*) AS freq 
FROM biotext_annotation sent
JOIN biotext_annotation word2 ON
   sent.pmid = word2.pmid AND
   sent.section = word2.section AND
   sent.sentence = word2.sentence AND
   sent.first_word_pos <= word2.first_word_pos
JOIN biotext_annotation word3 ON
   sent.pmid = word3.pmid AND
   sent.section = word3.section AND
   sent.sentence = word3.sentence AND
   word2.last_word_pos = word3.first_word_pos
JOIN biotext_annotation prep ON
   sent.pmid = prep.pmid AND
   sent.section = prep.section AND
   sent.sentence = prep.sentence AND
   word3.last_word_pos = prep.first_word_pos
LEFT OUTER JOIN biotext_annotation det ON
   sent.pmid = det.pmid AND
   sent.section = det.section AND
   sent.sentence = det.sentence AND
   prep.last_word_pos = det.first_word_pos AND 
   det.layer_id = 1 AND 
   det.tag_type = 15 AND 
   det.word_id IN (213391, 1595216, 1625502, 100564, 103713, 103924, 12848, 17852, 317290, 15157, 19411, 19749) 
JOIN biotext_annotation word1 ON
   sent.pmid = word1.pmid AND
   sent.section = word1.section AND
   sent.sentence = word1.sentence AND
   COALESCE(det.last_word_pos,prep.last_word_pos) = word1.first_word_pos AND
   word1.last_word_pos <= sent.last_word_pos
JOIN biotext_annotation_word prep_word on
   prep.word_id = prep_word.word_id
WHERE sent.layer_id = 4
  AND word2.layer_id = 1
  AND word2.tag_type IN (27, 28, 29, 30) 
  AND word2.word_id IN (217291, 48437, 49764, 1248998) 
  AND word3.layer_id = 1
  AND word3.tag_type IN (27, 28, 29, 30) 
  AND word3.word_id IN (43556, 45714, 1325075, 242403, 43610, 45717) 
  AND prep.layer_id = 1
  AND prep.tag_type = 19
  AND prep.word_id IN (646264, 74774, 76413, 76431, 152964, 44946, 46500, 1026156, 1585795, 54994, 58066, 58593, 1228005,
                               17300, 20372, 20458, 1489767, 74990, 76565, 76620, 1832548, 823731, 45381, 46712, 1401882,
			       324236, 108791, 109302, 12961, 18102, 1349781, 1408822, 61203, 62887, 1426930) 
  AND word1.layer_id = 1
  AND word1.tag_type IN (27, 28, 29, 30) 
  AND word1.word_id IN (70483, 73473, 1541807, 183393, 70488)
GROUP BY prep_low
ORDER BY freq DESC
;