comment and migration update

This commit is contained in:
Untone 2021-10-13 20:46:30 +03:00
parent c56577fb7d
commit cb40640b01
8 changed files with 107 additions and 81 deletions

View File

@ -7,6 +7,7 @@ from migration.tables.users import migrate as migrateUser
from migration.tables.content_items import get_metadata, migrate as migrateShout from migration.tables.content_items import get_metadata, migrate as migrateShout
from migration.tables.content_item_categories import migrate as migrateCategory from migration.tables.content_item_categories import migrate as migrateCategory
from migration.tables.tags import migrate as migrateTag from migration.tables.tags import migrate as migrateTag
from migration.tables.comments import migrate as migrateComment
from migration.utils import DateTimeEncoder from migration.utils import DateTimeEncoder
from orm import Community from orm import Community
@ -53,8 +54,7 @@ def users():
del user['email'] del user['email']
export_data[user['slug']] = user export_data[user['slug']] = user
counter += 1 counter += 1
export_list = sorted(export_data.items(), export_list = sorted(export_data.items(), key=lambda item: item[1]['rating'])[-10:]
key=lambda item: item[1]['rating'])[-10:]
open('migration/data/users.dict.json', 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) # NOTE: by old_id open('migration/data/users.dict.json', 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) # NOTE: by old_id
open('../src/data/authors.json', 'w').write(json.dumps(dict(export_list), open('../src/data/authors.json', 'w').write(json.dumps(dict(export_list),
cls=DateTimeEncoder, cls=DateTimeEncoder,
@ -143,19 +143,35 @@ def shouts():
def comments(): def comments():
''' migrating comments on content items one by one ''' ''' migrating comments on content items one by one '''
comments_data = json.loads(open('migration/data/comments.json').read()) content_data = json.loads(open('migration/data/content_items.json').read()) # old content
content_dict = { x['_id']: x for x in content_data } # by slug
shouts_dict = json.loads(open('migration/data/shouts.dict.json', 'r').read()) # all shouts by slug
print(str(len(shouts_dict.keys())) + ' migrated shouts loaded')
shouts_old = { x['old_id']: x for slug, x in shouts_dict.items() } # shouts by old_id
print(str(len(content_data)) + ' content items loaded')
comments_data = json.loads(open('migration/data/comments.json').read()) # by slug
print(str(len(comments_data)) + ' comments loaded') print(str(len(comments_data)) + ' comments loaded')
comments_by_post = {} comments_by_post = {}
for comment in comments_data: # sort comments by old posts ids
p = comment['contentItem'] for old_comment in comments_data:
comments_by_post[p] = comments_by_post.get(p, []) cid = old_comment['contentItem']
comments_by_post[p].append(comment) comments_by_post[cid] = comments_by_post.get(cid, [])
comments_by_post[cid].append(old_comment)
# migrate comments
comments_by_shoutslug = {}
for content_item in content_data:
old_id = content_item['_id']
if content_item.get('commentedAt', False):
comments = [ migrateComment(c) for c in comments_by_post.get(old_id, []) ]
if comments.length > 0:
shout = shouts_old.get(old_id, { 'slug': 'abandoned-comments' })
comments_by_shoutslug[shout['slug']] = comments
export_articles = json.loads(open('../src/data/articles.json').read()) export_articles = json.loads(open('../src/data/articles.json').read())
print(str(len(export_articles.items())) + ' articles were exported') print(str(len(export_articles.items())) + ' articles were exported')
export_comments = {} export_comments = {}
c = 0 c = 0
for slug, article in export_articles.items(): for slug, article in export_articles.items():
comments = comments_by_post.get(slug, []) comments = comments_by_shoutslug.get(slug, [])
if len(comments) > 0: if len(comments) > 0:
export_comments[slug] = comments export_comments[slug] = comments
c += len(comments) c += len(comments)
@ -171,13 +187,15 @@ def comments():
def export_shouts(limit): def export_shouts(limit):
print('reading json...') print('reading json...')
newdata = json.loads(open('migration/data/shouts.dict.json', 'r').read())
print(str(len(newdata.keys())) + ' shouts loaded')
content_data = json.loads(open('migration/data/content_items.json').read()) content_data = json.loads(open('migration/data/content_items.json').read())
content_dict = { x['_id']:x for x in content_data } content_dict = { x['_id']:x for x in content_data }
print(str(len(content_data)) + ' content items loaded')
newdata = json.loads(open('migration/data/shouts.dict.json', 'r').read())
print(str(len(newdata.keys())) + ' migrated shouts loaded')
users_old = json.loads(open('migration/data/users.dict.json').read()) users_old = json.loads(open('migration/data/users.dict.json').read())
print(str(len(newdata.keys())) + ' migrated users loaded')
export_authors = json.loads(open('../src/data/authors.json').read()) export_authors = json.loads(open('../src/data/authors.json').read())
print(str(len(export_authors.items())) + ' pre-exported authors loaded') print(str(len(export_authors.items())) + ' exported authors loaded')
users_slug = { u['slug']: u for old_id, u in users_old.items()} users_slug = { u['slug']: u for old_id, u in users_old.items()}
print(str(len(users_slug.items())) + ' users loaded') print(str(len(users_slug.items())) + ' users loaded')
@ -204,23 +222,23 @@ def export_shouts(limit):
ensure_ascii=False)) ensure_ascii=False))
print(str(len(export_clean.items())) + ' articles exported') print(str(len(export_clean.items())) + ' articles exported')
open('../src/data/authors.json', 'w').write(json.dumps(export_authors, open('../src/data/authors.json', 'w').write(json.dumps(export_authors,
cls=DateTimeEncoder, cls=DateTimeEncoder,
indent=4, indent=4,
sort_keys=True, sort_keys=True,
ensure_ascii=False)) ensure_ascii=False))
comments() comments()
print(str(len(export_authors.items())) + ' total authors exported') print(str(len(export_authors.items())) + ' total authors exported')
def export_slug(slug): def export_slug(slug):
shouts_dict = json.loads(open('migration/data/shouts.dict.json').read()) shouts_dict = json.loads(open('migration/data/shouts.dict.json').read())
print(str(len(shouts_dict.items())) + ' shouts loaded') print(str(len(shouts_dict.items())) + ' migrated shouts loaded')
users_old = json.loads(open('migration/data/users.dict.json').read()) users_old = json.loads(open('migration/data/users.dict.json').read()) # NOTE: this exact file is by old_id
print(str(len(users_old.items())) + ' users loaded') print(str(len(users_old.items())) + ' migrated users loaded')
users_dict = { x[1]['slug']:x for x in users_old.items() } users_dict = { x[1]['slug']:x for x in users_old.items() }
exported_authors = json.loads(open('../src/data/authors.json').read()) exported_authors = json.loads(open('../src/data/authors.json').read())
print(str(len(exported_authors.items())) + ' authors were exported before') print(str(len(exported_authors.items())) + ' exported authors loaded')
exported_articles = json.loads(open('../src/data/articles.json').read()) exported_articles = json.loads(open('../src/data/articles.json').read())
print(str(len(exported_articles.items())) + ' articles were exported before') print(str(len(exported_articles.items())) + ' exported articles loaded')
shout = shouts_dict.get(slug, False) shout = shouts_dict.get(slug, False)
if shout: if shout:
author = users_dict.get(shout['authors'][0]['slug'], None) author = users_dict.get(shout['authors'][0]['slug'], None)
@ -239,7 +257,7 @@ def export_slug(slug):
ensure_ascii=False)) ensure_ascii=False))
else: else:
print('no old id error!') print('no old id error!')
print(str(len(shouts_dict)) + ' shouts were migrated') # print(str(len(shouts_dict)) + ' shouts were migrated')
print(slug) print(slug)
comments() comments()
print('finished.') print('finished.')
@ -252,8 +270,6 @@ if __name__ == '__main__':
users() users()
elif sys.argv[1] == "topics": elif sys.argv[1] == "topics":
topics() topics()
elif sys.argv[1] == "comments":
comments()
elif sys.argv[1] == "shouts": elif sys.argv[1] == "shouts":
try: try:
Community.create(**{ Community.create(**{
@ -266,17 +282,20 @@ if __name__ == '__main__':
except Exception: except Exception:
pass pass
shouts() shouts()
elif sys.argv[1] == "comments":
comments()
elif sys.argv[1] == "export_shouts": elif sys.argv[1] == "export_shouts":
limit = int(sys.argv[2]) if len(sys.argv) > 2 else None limit = int(sys.argv[2]) if len(sys.argv) > 2 else None
export_shouts(limit) export_shouts(limit)
elif sys.argv[1] == "all": elif sys.argv[1] == "all":
users() users()
topics() topics()
shouts() shouts()
comments()
elif sys.argv[1] == "bson": elif sys.argv[1] == "bson":
from migration import bson2json from migration import bson2json
bson2json.json_tables() bson2json.json_tables()
elif sys.argv[1] == 'slug': elif sys.argv[1] == 'slug':
export_slug(sys.argv[2]) export_slug(sys.argv[2])
else: else:
print('usage: python migrate.py <bson|slug|topics|users|shouts|export_shouts [num]|slug [str]|all>') print('usage: python migrate.py bson\n.. \ttopics <limit>\n.. \tusers <limit>\n.. \tshouts <limit>\n.. \tcomments\n.. \texport_shouts <limit>\n.. \tslug <slug>\n.. \tall>')

View File

@ -388,7 +388,7 @@ class HTML2Text(HTMLParser.HTMLParser):
parent_style = {} parent_style = {}
if start: if start:
if self.tag_stack: if self.tag_stack:
parent_style = self.tag_stack[-1][2] parent_style = self.tag_stack[-1][2]
tag_style = element_style(attrs, self.style_def, parent_style) tag_style = element_style(attrs, self.style_def, parent_style)
self.tag_stack.append((tag, attrs, tag_style)) self.tag_stack.append((tag, attrs, tag_style))
else: else:
@ -418,7 +418,7 @@ class HTML2Text(HTMLParser.HTMLParser):
elif attrs['class'] == 'lead': elif attrs['class'] == 'lead':
self.o('==') # NOTE: but CriticMarkup uses {== ==} self.o('==') # NOTE: but CriticMarkup uses {== ==}
self.span_lead = True self.span_lead = True
elif self.: else:
if self.span_hightlight: if self.span_hightlight:
self.o('`') self.o('`')
self.span_hightlight = False self.span_hightlight = False
@ -600,7 +600,7 @@ class HTML2Text(HTMLParser.HTMLParser):
# if self.google_doc: # if self.google_doc:
# prevent white space immediately after 'begin emphasis' marks ('**' and '_') # prevent white space immediately after 'begin emphasis' marks ('**' and '_')
lstripped_data = data.lstrip() lstripped_data = data.lstrip()
if self.drop_white_space and not (self.pre or self.code or self.span_hightlight or self.span_lead): if self.drop_white_space and not (self.pre or self.code):
data = lstripped_data data = lstripped_data
if puredata: # and not self.pre: if puredata: # and not self.pre:
data = re.sub('\s+', ' ', data) data = re.sub('\s+', ' ', data)

View File

@ -1,12 +1,13 @@
from dateutil.parser import parse as date_parse from dateutil.parser import parse as date_parse
import json import json
import datetime
from os.path import abspath from os.path import abspath
from orm import Shout, Comment, CommentRating, User from orm import Shout, Comment, CommentRating, User
from orm.base import local_session from orm.base import local_session
from migration.html2text import html2text from migration.html2text import html2text
users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read()) # users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
topics_dict = json.loads(open(abspath('migration/data/topics.dict.json')).read()) # old_id keyed # topics_dict = json.loads(open(abspath('migration/data/topics.dict.json')).read()) # old_id keyed
def migrate(entry): def migrate(entry):
''' '''
@ -39,39 +40,46 @@ def migrate(entry):
deletedAt: DateTime deletedAt: DateTime
deletedBy: Int deletedBy: Int
rating: Int rating: Int
ratigns: [Rating] ratigns: [CommentRating]
views: Int views: Int
old_id: String old_id: String
old_thread: String
} }
''' '''
with local_session() as session: with local_session() as session:
shout_id = session.query(Shout).filter(Shout.old_id == entry['_id']).first() shout = session.query(Shout).filter(Shout.old_id == entry['_id']).first()
author_dict = users_dict[entry['createdBy']] if not shout: print(entry)
print(author_dict) assert shout, '=== NO SHOUT IN COMMENT ERROR ==='
author_id = author_dict['id'] author = session.query(User).filter(User.old_id == entry['_id']).first()
comment_dict = { comment_dict = {
'old_id': entry['_id'], 'old_id': entry['_id'],
'author': author_id, 'author': author.id if author else 0,
'createdAt': date_parse(entry['createdAt']), 'createdAt': date_parse(entry['createdAt']),
'body': html2text(entry['body']), 'body': html2text(entry['body']),
'shout': shout_id 'shout': shout
} }
if 'rating' in entry: if 'rating' in entry:
comment_dict['rating'] = entry['rating'] comment_dict['rating'] = entry['rating']
if 'deleted' in entry: if entry.get('deleted'):
comment_dict['deleted'] = entry['deleted'] comment_dict['deletedAt'] = entry['updatedAt']
comment_dict['deletedBy'] = entry['updatedBy']
if 'thread' in entry: if 'thread' in entry:
comment_dict['old_thread'] = entry['thread'] comment_dict['old_thread'] = entry['thread']
print(entry.keys()) # print(entry.keys())
comment = Comment.create(**comment_dict) comment = Comment.create(**comment_dict)
for comment_rating_old in entry.get('ratings',[]): for comment_rating_old in entry.get('ratings',[]):
rater_id = session.query(User).filter(User.old_id == comment_rating_old['createdBy']).first() rater_id = session.query(User).filter(User.old_id == comment_rating_old['createdBy']).first()
comment_rating_dict = { comment_rating_dict = {
'value': cr['value'], 'value': comment_rating_old['value'],
'createdBy': rater_id, 'createdBy': rater_id or 0,
'createdAt': date_parse(comment_rating_old['createdAt']) or ts 'createdAt': comment_rating_old.get('createdAt', datetime.datetime.now()),
'comment_id': comment.id
} }
comment_rating = CommentRating.create(**comment_rating_dict) try:
comment['ratings'].append(comment_rating) comment_rating = CommentRating.create(**comment_rating_dict)
# TODO: comment rating append resolver
# comment['ratings'].append(comment_rating)
except Exception as e:
print(comment_rating)
pass # raise e
return comment return comment

View File

@ -9,7 +9,7 @@ def migrate(entry):
children: [String] # and children children: [String] # and children
} }
''' '''
return { topic_dict = {
'slug': entry['slug'], 'slug': entry['slug'],
'createdBy': entry['createdBy'], # NOTE: uses an old user id 'createdBy': entry['createdBy'], # NOTE: uses an old user id
'createdAt': entry['createdAt'], 'createdAt': entry['createdAt'],
@ -18,3 +18,10 @@ def migrate(entry):
'children': [], 'children': [],
'old_id': entry['_id'] 'old_id': entry['_id']
} }
with local_session() as session:
topic = session.query(Topic).filter(Topic.slug == topic_slug).first()
if not topic:
topic = Topic.create(**topic_dict)
topic_dict['id'] = topic.id
return topic_dict

View File

@ -184,6 +184,7 @@ def migrate(entry):
with local_session() as session: with local_session() as session:
user = session.query(User).filter(User.slug == slug).first() user = session.query(User).filter(User.slug == slug).first()
r['authors'].append({ r['authors'].append({
'id': user.id,
'slug': slug, 'slug': slug,
'name': name, 'name': name,
'userpic': userpic 'userpic': userpic
@ -197,7 +198,7 @@ def migrate(entry):
if entry['published']: if entry['published']:
ext = 'md' ext = 'md'
open('migration/content/' + open('migration/content/' +
r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content) r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
try: try:
shout_dict = r.copy() shout_dict = r.copy()
shout_dict['authors'] = [user, ] shout_dict['authors'] = [user, ]
@ -212,21 +213,6 @@ def migrate(entry):
shout_dict['publishedAt'] = ts shout_dict['publishedAt'] = ts
del shout_dict['published'] del shout_dict['published']
# shout comments
if entry.get('commentedAt', False):
try:
old_comments = comments_by_post.get(shout_dict['old_id'], [])
if len(old_comments) > 0:
shout_dict['comments'] = []
# migrate comments
for entry in old_comments:
comment = migrateComment(entry)
shout_dict['comments'].append(comment)
except KeyError:
print(shout_dict.keys())
raise 'error'
try: try:
topic_slugs = shout_dict['topics'] topic_slugs = shout_dict['topics']
del shout_dict['topics'] # FIXME: AttributeError: 'str' object has no attribute '_sa_instance_state' del shout_dict['topics'] # FIXME: AttributeError: 'str' object has no attribute '_sa_instance_state'
@ -248,18 +234,18 @@ def migrate(entry):
) )
shout.ratings.append(shout_rating.id) shout.ratings.append(shout_rating.id)
''' '''
# adding topics to created shout
for topic_slug in topic_slugs: for topic_slug in topic_slugs:
topic_dict = topics_dict.get(topic_slug) if not topic:
if topic_dict: topic_dict = topics_dict.get(topic_slug)
topic = Topic.create(**topic_dict) if topic_dict:
topic = Topic.create(**topic_dict)
shout.topics = [ topic, ] shout.topics = [ topic, ]
shout.save() shout.save()
except Exception as e: except Exception as e:
r['error'] = 'db error' r['error'] = 'db error'
# pass # pass
raise e raise e
except Exception as e: except Exception as e:
if not r['body']: r['body'] = 'body moved' if not r['body']: r['body'] = 'body moved'
raise e raise e

View File

@ -81,8 +81,6 @@ def migrate(entry, limit=668):
res['slug'] = res['email'].split('@')[0] res['slug'] = res['email'].split('@')[0]
else: else:
old = res['old_id'] old = res['old_id']
del res['old_id']
user = User.create(**res.copy()) user = User.create(**res.copy())
res['id'] = user.id res['id'] = user.id
res['old_id'] = old
return res return res

View File

@ -10,9 +10,9 @@ class CommentRating(Base):
__tablename__ = "comment_rating" __tablename__ = "comment_rating"
id = None id = None
rater_id = Column(ForeignKey('user.id'), primary_key = True)
comment_id = Column(ForeignKey('comment.id'), primary_key = True) comment_id = Column(ForeignKey('comment.id'), primary_key = True)
ts: str = Column(DateTime, nullable=False, default = datetime.now, comment="Timestamp") createdBy = Column(ForeignKey('user.id'), primary_key = True)
createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Timestamp")
value = Column(Integer) value = Column(Integer)
class Comment(Base): class Comment(Base):
@ -28,7 +28,7 @@ class Comment(Base):
rating: int = Column(Integer, nullable=True, comment="Comment Rating") rating: int = Column(Integer, nullable=True, comment="Comment Rating")
ratings = relationship(CommentRating, foreign_keys=CommentRating.comment_id) ratings = relationship(CommentRating, foreign_keys=CommentRating.comment_id)
old_id: str = Column(String, nullable = True) old_id: str = Column(String, nullable = True)
deleted: bool = Column(Boolean, nullable = True) old_thread: str = Column(String, nullable = True)
# TODO: work in progress, udpate this code # TODO: work in progress, udpate this code

View File

@ -152,7 +152,7 @@ type UserNotification {
type User { type User {
id: Int! id: Int!
username: String! # email username: String! # to login, ex. email
createdAt: DateTime! createdAt: DateTime!
slug: String! slug: String!
name: String # to display name: String # to display
@ -196,10 +196,18 @@ type Comment {
deletedAt: DateTime deletedAt: DateTime
deletedBy: Int deletedBy: Int
rating: Int rating: Int
ratigns: [Rating] ratigns: [CommentRating]
views: Int views: Int
old_id: String old_id: String
deleted: Boolean old_thread: String
}
type CommentRating {
id: Int!
comment_id: Int!
createdBy: Int!
createdAt: DateTime!
value: Int!
} }
# is publication # is publication