Edit History of Stack Overflow Threads (Google Big Query)
Create table with edit and comment history of posts:
#standardsql
SELECT *
FROM (
SELECT
pv.PostId as PostId,
pv.PostTypeId as PostTypeId,
PostHistoryId as EventId,
CASE
WHEN pv.PostHistoryTypeId=2 THEN "InitialBody"
ELSE "BodyEdit"
END as Event,
UserId,
pv.CreationDate AS CreationDate
FROM `sotorrent-org.2018_06_17.PostVersion` pv
JOIN `sotorrent-org.2018_06_17.PostHistory` ph
ON pv.PostHistoryId = ph.Id
UNION ALL
SELECT
tv.PostId as PostId,
tv.PostTypeId as PostTypeId,
PostHistoryId as EventId,
CASE
WHEN tv.PostHistoryTypeId=1 THEN "InitialTitle"
ELSE "TitleEdit"
END as Event,
UserId,
tv.CreationDate as CreationDate
FROM `sotorrent-org.2018_06_17.TitleVersion` tv
JOIN `sotorrent-org.2018_06_17.PostHistory` ph
ON tv.PostHistoryId = ph.Id
UNION ALL
SELECT
PostId,
PostTypeId,
c.Id as EventId,
"Comment" AS Event,
UserId,
c.CreationDate as CreationDate
FROM `sotorrent-org.2018_06_17.Comments` c
JOIN `sotorrent-org.2018_06_17.Posts` p
ON c.PostId = p.Id
);
Store the result of this query in a new table <YOUR_DATASET>.EditHistory
.
Create a helper table that allows to use the same query to retrieve the edit history of a thread using the PostId
of a question or an answer:
#standardsql
SELECT
Id as PostId,
PostTypeId,
CASE
WHEN PostTypeId=1 THEN Id
WHEN PostTypeId=2 THEN ParentId
END as ParentId
FROM `sotorrent-org.2018_06_17.Posts`
WHERE PostTypeId=1 # only consider questions and answers
OR PostTypeId=2;
Store the result of this query in a new table <YOUR_DATASET>.Threads
.
Retrieve edit history of a thread:
#standardsql
SELECT *
FROM `sotorrent-org.2018_06_17.EditHistory`
WHERE PostId IN (
SELECT PostId
FROM `<YOUR_DATASET>.Threads`
WHERE ParentId = (
SELECT ParentID
FROM `<YOUR_DATASET>.Threads`
WHERE PostId=3758880 # this is an answer id, the question id 3758606 yields the same result
)
)
ORDER BY CreationDate;