SQL for NC_PARAPHRASE_PREPOSITIONAL_RIGHT_4a


SELECT lp.prep_low,
       COUNT(*) AS freq 
FROM (
SELECT prep_word.word_lower prep_low
FROM biotext_annotation sent
JOIN biotext_annotation word2 ON
   sent.pmid = word2.pmid AND
   sent.section = word2.section AND
   sent.sentence = word2.sentence AND
   sent.first_word_pos <= word2.first_word_pos
JOIN biotext_annotation word3 ON
   sent.pmid = word3.pmid AND
   sent.section = word3.section AND
   sent.sentence = word3.sentence AND
   word2.last_word_pos = word3.first_word_pos
JOIN biotext_annotation prep ON
   sent.pmid = prep.pmid AND
   sent.section = prep.section AND
   sent.sentence = prep.sentence AND
   word3.last_word_pos = prep.first_word_pos
JOIN biotext_annotation word1 ON
   sent.pmid = word1.pmid AND
   sent.section = word1.section AND
   sent.sentence = word1.sentence AND
   prep.last_word_pos = word1.first_word_pos AND
   word1.last_word_pos <= sent.last_word_pos
JOIN biotext_annotation_word prep_word on
   prep.word_id = prep_word.word_id
WHERE sent.layer_id IN (4) 
  AND word2.layer_id IN (1) 
  AND word2.tag_type IN (27, 28, 29, 30) 
  AND word2.word_id IN (217291, 48437, 49764, 1248998) 
  AND word3.layer_id IN (1) 
  AND word3.tag_type IN (27, 28, 29, 30) 
  AND word3.word_id IN (43556, 45714, 1325075, 242403, 43610, 45717) 
  AND prep.layer_id IN (1) 
  AND prep.tag_type IN (19) 
  AND prep.word_id IN (646264, 74774, 76413, 76431, 152964, 44946, 46500, 1026156, 1585795, 54994, 58066, 58593, 1228005,
                               17300, 20372, 20458, 1489767, 74990, 76565, 76620, 1832548, 823731, 45381, 46712, 1401882,
			       324236, 108791, 109302, 12961, 18102, 1349781, 1408822, 61203, 62887, 1426930) 
  AND word1.layer_id IN (1) 
  AND word1.tag_type IN (27, 28, 29, 30) 
  AND word1.word_id IN (70483, 73473, 1541807, 183393, 70488)

   UNION ALL

SELECT prep_word.word_lower prep_low
FROM biotext_annotation sent
JOIN biotext_annotation word2 ON
   sent.pmid = word2.pmid AND
   sent.section = word2.section AND
   sent.sentence = word2.sentence AND
   sent.first_word_pos <= word2.first_word_pos
JOIN biotext_annotation word3 ON
   sent.pmid = word3.pmid AND
   sent.section = word3.section AND
   sent.sentence = word3.sentence AND
   word2.last_word_pos = word3.first_word_pos
JOIN biotext_annotation prep ON
   sent.pmid = prep.pmid AND
   sent.section = prep.section AND
   sent.sentence = prep.sentence AND
   word3.last_word_pos = prep.first_word_pos
JOIN biotext_annotation det ON
   sent.pmid = det.pmid AND
   sent.section = det.section AND
   sent.sentence = det.sentence AND
   prep.last_word_pos = det.first_word_pos
JOIN biotext_annotation word1 ON
   sent.pmid = word1.pmid AND
   sent.section = word1.section AND
   sent.sentence = word1.sentence AND
   det.last_word_pos = word1.first_word_pos AND
   word1.last_word_pos <= sent.last_word_pos
JOIN biotext_annotation_word prep_word on
   prep.word_id = prep_word.word_id
WHERE sent.layer_id IN (4) 
  AND word2.layer_id IN (1) 
  AND word2.tag_type IN (27, 28, 29, 30) 
  AND word2.word_id IN (217291, 48437, 49764, 1248998) 
  AND word3.layer_id IN (1) 
  AND word3.tag_type IN (27, 28, 29, 30) 
  AND word3.word_id IN (43556, 45714, 1325075, 242403, 43610, 45717) 
  AND prep.layer_id IN (1) 
  AND prep.tag_type IN (19) 
  AND prep.word_id IN (646264, 74774, 76413, 76431, 152964, 44946, 46500, 1026156, 1585795, 54994, 58066, 58593, 1228005,
                               17300, 20372, 20458, 1489767, 74990, 76565, 76620, 1832548, 823731, 45381, 46712, 1401882,
			       324236, 108791, 109302, 12961, 18102, 1349781, 1408822, 61203, 62887, 1426930) 
  AND det.layer_id IN (1) 
  AND det.tag_type IN (15) 
  AND det.word_id IN (213391, 1595216, 1625502, 100564, 103713, 103924, 12848, 17852, 317290, 15157, 19411, 19749) 
  AND word1.layer_id IN (1) 
  AND word1.tag_type IN (27, 28, 29, 30) 
  AND word1.word_id IN (70483, 73473, 1541807, 183393, 70488)
) AS lp
GROUP BY lp.prep_low
ORDER BY freq DESC
;