fix comments migration; remove unused columns

This commit is contained in:
knst-kotov 2021-12-08 10:28:38 +03:00
parent ea8fffc25d
commit 1c38d51113
3 changed files with 96 additions and 120 deletions

View File

@ -9,6 +9,7 @@ from migration.tables.content_items import get_metadata, migrate as migrateShout
from migration.tables.content_item_categories import migrate as migrateCategory
from migration.tables.tags import migrate as migrateTag
from migration.tables.comments import migrate as migrateComment
from migration.tables.comments import migrate_2stage as migrateComment_2stage
from migration.utils import DateTimeEncoder
from orm import Community, Topic
from dateutil.parser import parse as date_parse
@ -206,41 +207,16 @@ def export_slug(slug, export_articles, export_authors, content_dict):
export_body(shout, content_dict)
comments([slug, ])
def comments(sluglist, export_comments, export_articles, shouts_by_slug, content_dict):
''' migrating comments on content items one '''
if len(sluglist) == 0:
export_articles = json.loads(open('../src/data/articles.json').read())
print(str(len(export_articles.items())) + ' articles were exported before')
if len(sluglist) == 0: sluglist = list(export_articles.keys())
if len(sluglist) > 0:
print('exporting comments for: ')
print(' '.join(sluglist))
for slug in sluglist:
shout = shouts_by_slug[slug]
old_id = shout['old_id']
content_item = content_dict.get(old_id, {})
if content_item.get('commentedAt', False):
comments = [ migrateComment(c) for c in comments_by_post.get(old_id, []) ]
if len(comments) > 0:
export_comments[slug] = comments
sys.stdout.write('.')
else:
print('exporting comments for top 10 commented articles...')
comments_by_shoutslug = {}
for content_item in content_data:
old_id = content_item['_id']
if content_item.get('commentedAt', False):
comments = [ migrateComment(c) for c in comments_by_post.get(old_id, []) ]
if len(comments) > 0:
shout = shouts_by_oid.get(old_id, { 'slug': 'abandoned-comments' })
comments_by_shoutslug[shout['slug']] = comments
top = dict(sorted(comments_by_shoutslug.items(), reverse=True, key=lambda c: len(c[1]))[:10])
export_comments.update(top)
print(str(len(export_comments.keys())) + ' articls with comments exported\n')
def comments(comments_data):
id_map = {}
for comment in comments_data:
comment = migrateComment(comment)
id = comment.get('id')
old_id = comment.get('old_id')
id_map[old_id] = id
for comment in comments_data:
migrateComment_2stage(comment, id_map)
print(str(len(id_map)) + ' comments exported')
def export_finish(export_articles = {}, export_authors = {}, export_topics = {}, export_comments = {}):
@ -342,16 +318,14 @@ if __name__ == '__main__':
elif cmd == "shouts":
shouts(content_data, shouts_by_slug, shouts_by_oid) # NOTE: listens limit
elif cmd == "comments":
for comment in comments_data:
migrateComment(comment)
comments(comments_data)
elif cmd == "export_shouts":
export_shouts(shouts_by_slug, export_articles, export_authors, content_dict)
elif cmd == "all":
users(users_by_oid, users_by_slug, users_data)
topics(export_topics, topics_by_slug, topics_by_cat, topics_by_tag, cats_data, tags_data)
shouts(content_data, shouts_by_slug, shouts_by_oid)
for comment in comments_data:
migrateComment(comment)
comments(comments_data)
elif cmd == 'slug':
export_slug(sys.argv[2], export_articles, export_authors, content_dict)
#export_finish(export_articles, export_authors, export_topics, export_comments)

View File

@ -7,83 +7,88 @@ from orm.base import local_session
from migration.html2text import html2text
def migrate(entry):
'''
{
"_id": "hdtwS8fSyFLxXCgSC",
"body": "<p>",
"contentItem": "mnK8KsJHPRi8DrybQ",
"createdBy": "bMFPuyNg6qAD2mhXe",
"thread": "01/",
"createdAt": "2016-04-19 04:33:53+00:00",
"ratings": [
{ "createdBy": "AqmRukvRiExNpAe8C", "value": 1 },
{ "createdBy": "YdE76Wth3yqymKEu5", "value": 1 }
],
"rating": 2,
"updatedAt": "2020-05-27 19:22:57.091000+00:00",
"updatedBy": "0"
}
'''
{
"_id": "hdtwS8fSyFLxXCgSC",
"body": "<p>",
"contentItem": "mnK8KsJHPRi8DrybQ",
"createdBy": "bMFPuyNg6qAD2mhXe",
"thread": "01/",
"createdAt": "2016-04-19 04:33:53+00:00",
"ratings": [
{ "createdBy": "AqmRukvRiExNpAe8C", "value": 1 },
{ "createdBy": "YdE76Wth3yqymKEu5", "value": 1 }
],
"rating": 2,
"updatedAt": "2020-05-27 19:22:57.091000+00:00",
"updatedBy": "0"
}
->
->
type Comment {
id: Int!
author: Int!
body: String!
replyTo: Int!
createdAt: DateTime!
updatedAt: DateTime
shout: Int!
deletedAt: DateTime
deletedBy: Int
ratings: [CommentRating]
views: Int
old_id: String
old_thread: String
}
'''
with local_session() as session:
shout = session.query(Shout).filter(Shout.old_id == entry['_id']).first()
if not shout: shout = session.query(Shout).first()
author = session.query(User).filter(User.old_id == entry['_id']).first() # FIXME
comment_dict = {
'old_id': entry['_id'],
'author': author.id if author else 0,
'createdAt': date_parse(entry['createdAt']),
'body': html2text(entry['body']),
'shout': shout.id
}
#TODO save as CommentRating
#if 'rating' in entry:
# comment_dict['rating'] = entry['rating']
if entry.get('deleted'):
comment_dict['deletedAt'] = date_parse(entry['updatedAt'])
comment_dict['deletedBy'] = str(entry['updatedBy'])
if entry.get('updatedAt'):
comment_dict['updatedAt'] = date_parse(entry['updatedAt'])
# comment_dict['updatedBy'] = str(entry.get('updatedBy', 0)) invalid keyword for Comment
if 'thread' in entry:
comment_dict['old_thread'] = entry['thread']
# print(comment_dict)
comment = Comment.create(**comment_dict)
comment_dict['id'] = comment.id
comment_dict['ratings'] = []
# print(comment)
for comment_rating_old in entry.get('ratings',[]):
rater = session.query(User).filter(User.old_id == comment_rating_old['createdBy']).first()
if rater and comment:
comment_rating_dict = {
'value': comment_rating_old['value'],
'createdBy': rater.id,
'comment_id': comment.id
}
cts = comment_rating_old.get('createdAt')
if cts: comment_rating_dict['createdAt'] = date_parse(cts)
try:
comment_rating = CommentRating.create(**comment_rating_dict)
# comment_rating_dict['id'] = comment_rating.id
comment_dict['ratings'].append(comment_rating_dict)
except Exception as e:
print(comment_rating_dict)
raise e
return comment_dict
type Comment {
id: Int!
author: Int!
body: String!
replyTo: Int!
createdAt: DateTime!
updatedAt: DateTime
shout: Int!
deletedAt: DateTime
deletedBy: Int
ratings: [CommentRating]
views: Int
}
'''
with local_session() as session:
shout = session.query(Shout).filter(Shout.old_id == entry['contentItem']).first()
if not shout: shout = session.query(Shout).first()
author = session.query(User).filter(User.old_id == entry['createdBy']).first()
comment_dict = {
'author': author.id if author else 0,
'createdAt': date_parse(entry['createdAt']),
'body': html2text(entry['body']),
'shout': shout.id
}
if entry.get('deleted'):
comment_dict['deletedAt'] = date_parse(entry['updatedAt'])
comment_dict['deletedBy'] = str(entry['updatedBy'])
if entry.get('updatedAt'):
comment_dict['updatedAt'] = date_parse(entry['updatedAt'])
# comment_dict['updatedBy'] = str(entry.get('updatedBy', 0)) invalid keyword for Comment
# print(comment_dict)
comment = Comment.create(**comment_dict)
comment_dict['id'] = comment.id
comment_dict['ratings'] = []
comment_dict['old_id'] = entry['_id']
# print(comment)
for comment_rating_old in entry.get('ratings',[]):
rater = session.query(User).filter(User.old_id == comment_rating_old['createdBy']).first()
if rater and comment:
comment_rating_dict = {
'value': comment_rating_old['value'],
'createdBy': rater.id,
'comment_id': comment.id
}
cts = comment_rating_old.get('createdAt')
if cts: comment_rating_dict['createdAt'] = date_parse(cts)
try:
comment_rating = CommentRating.create(**comment_rating_dict)
# comment_rating_dict['id'] = comment_rating.id
comment_dict['ratings'].append(comment_rating_dict)
except Exception as e:
print(comment_rating_dict)
raise e
return comment_dict
def migrate_2stage(entry, id_map):
old_reply_to = entry.get('replyTo')
if not old_reply_to:
return
old_id = entry['_id']
id = id_map.get(old_id)
with local_session() as session:
comment = session.query(Comment).filter(Comment.id == id).first()
reply_to = id_map.get(old_reply_to)
comment.replyTo = reply_to
session.commit()

View File

@ -28,8 +28,5 @@ class Comment(Base):
shout: int = Column(ForeignKey("shout.id"), nullable=False, comment="Shout ID")
replyTo: int = Column(ForeignKey("comment.id"), nullable=True, comment="comment ID")
ratings = relationship(CommentRating, foreign_keys=CommentRating.comment_id)
old_id: str = Column(String, nullable = True)
old_thread: str = Column(String, nullable = True)
# TODO: work in progress, udpate this code