comment and migration update

This commit is contained in:
Untone 2021-10-13 20:46:30 +03:00
parent c56577fb7d
commit cb40640b01
8 changed files with 107 additions and 81 deletions

View File

@ -7,6 +7,7 @@ from migration.tables.users import migrate as migrateUser
from migration.tables.content_items import get_metadata, migrate as migrateShout
from migration.tables.content_item_categories import migrate as migrateCategory
from migration.tables.tags import migrate as migrateTag
from migration.tables.comments import migrate as migrateComment
from migration.utils import DateTimeEncoder
from orm import Community
@ -53,8 +54,7 @@ def users():
del user['email']
export_data[user['slug']] = user
counter += 1
export_list = sorted(export_data.items(),
key=lambda item: item[1]['rating'])[-10:]
export_list = sorted(export_data.items(), key=lambda item: item[1]['rating'])[-10:]
open('migration/data/users.dict.json', 'w').write(json.dumps(newdata, cls=DateTimeEncoder)) # NOTE: by old_id
open('../src/data/authors.json', 'w').write(json.dumps(dict(export_list),
cls=DateTimeEncoder,
@ -143,19 +143,35 @@ def shouts():
def comments():
''' migrating comments on content items one by one '''
comments_data = json.loads(open('migration/data/comments.json').read())
content_data = json.loads(open('migration/data/content_items.json').read()) # old content
content_dict = { x['_id']: x for x in content_data } # by slug
shouts_dict = json.loads(open('migration/data/shouts.dict.json', 'r').read()) # all shouts by slug
print(str(len(shouts_dict.keys())) + ' migrated shouts loaded')
shouts_old = { x['old_id']: x for slug, x in shouts_dict.items() } # shouts by old_id
print(str(len(content_data)) + ' content items loaded')
comments_data = json.loads(open('migration/data/comments.json').read()) # by slug
print(str(len(comments_data)) + ' comments loaded')
comments_by_post = {}
for comment in comments_data:
p = comment['contentItem']
comments_by_post[p] = comments_by_post.get(p, [])
comments_by_post[p].append(comment)
# sort comments by old posts ids
for old_comment in comments_data:
cid = old_comment['contentItem']
comments_by_post[cid] = comments_by_post.get(cid, [])
comments_by_post[cid].append(old_comment)
# migrate comments
comments_by_shoutslug = {}
for content_item in content_data:
old_id = content_item['_id']
if content_item.get('commentedAt', False):
comments = [ migrateComment(c) for c in comments_by_post.get(old_id, []) ]
if comments.length > 0:
shout = shouts_old.get(old_id, { 'slug': 'abandoned-comments' })
comments_by_shoutslug[shout['slug']] = comments
export_articles = json.loads(open('../src/data/articles.json').read())
print(str(len(export_articles.items())) + ' articles were exported')
export_comments = {}
c = 0
for slug, article in export_articles.items():
comments = comments_by_post.get(slug, [])
comments = comments_by_shoutslug.get(slug, [])
if len(comments) > 0:
export_comments[slug] = comments
c += len(comments)
@ -171,13 +187,15 @@ def comments():
def export_shouts(limit):
print('reading json...')
newdata = json.loads(open('migration/data/shouts.dict.json', 'r').read())
print(str(len(newdata.keys())) + ' shouts loaded')
content_data = json.loads(open('migration/data/content_items.json').read())
content_dict = { x['_id']:x for x in content_data }
print(str(len(content_data)) + ' content items loaded')
newdata = json.loads(open('migration/data/shouts.dict.json', 'r').read())
print(str(len(newdata.keys())) + ' migrated shouts loaded')
users_old = json.loads(open('migration/data/users.dict.json').read())
print(str(len(newdata.keys())) + ' migrated users loaded')
export_authors = json.loads(open('../src/data/authors.json').read())
print(str(len(export_authors.items())) + ' pre-exported authors loaded')
print(str(len(export_authors.items())) + ' exported authors loaded')
users_slug = { u['slug']: u for old_id, u in users_old.items()}
print(str(len(users_slug.items())) + ' users loaded')
@ -204,23 +222,23 @@ def export_shouts(limit):
ensure_ascii=False))
print(str(len(export_clean.items())) + ' articles exported')
open('../src/data/authors.json', 'w').write(json.dumps(export_authors,
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
cls=DateTimeEncoder,
indent=4,
sort_keys=True,
ensure_ascii=False))
comments()
print(str(len(export_authors.items())) + ' total authors exported')
def export_slug(slug):
shouts_dict = json.loads(open('migration/data/shouts.dict.json').read())
print(str(len(shouts_dict.items())) + ' shouts loaded')
users_old = json.loads(open('migration/data/users.dict.json').read())
print(str(len(users_old.items())) + ' users loaded')
print(str(len(shouts_dict.items())) + ' migrated shouts loaded')
users_old = json.loads(open('migration/data/users.dict.json').read()) # NOTE: this exact file is by old_id
print(str(len(users_old.items())) + ' migrated users loaded')
users_dict = { x[1]['slug']:x for x in users_old.items() }
exported_authors = json.loads(open('../src/data/authors.json').read())
print(str(len(exported_authors.items())) + ' authors were exported before')
print(str(len(exported_authors.items())) + ' exported authors loaded')
exported_articles = json.loads(open('../src/data/articles.json').read())
print(str(len(exported_articles.items())) + ' articles were exported before')
print(str(len(exported_articles.items())) + ' exported articles loaded')
shout = shouts_dict.get(slug, False)
if shout:
author = users_dict.get(shout['authors'][0]['slug'], None)
@ -239,7 +257,7 @@ def export_slug(slug):
ensure_ascii=False))
else:
print('no old id error!')
print(str(len(shouts_dict)) + ' shouts were migrated')
# print(str(len(shouts_dict)) + ' shouts were migrated')
print(slug)
comments()
print('finished.')
@ -252,8 +270,6 @@ if __name__ == '__main__':
users()
elif sys.argv[1] == "topics":
topics()
elif sys.argv[1] == "comments":
comments()
elif sys.argv[1] == "shouts":
try:
Community.create(**{
@ -266,17 +282,20 @@ if __name__ == '__main__':
except Exception:
pass
shouts()
elif sys.argv[1] == "comments":
comments()
elif sys.argv[1] == "export_shouts":
limit = int(sys.argv[2]) if len(sys.argv) > 2 else None
export_shouts(limit)
limit = int(sys.argv[2]) if len(sys.argv) > 2 else None
export_shouts(limit)
elif sys.argv[1] == "all":
users()
topics()
shouts()
comments()
elif sys.argv[1] == "bson":
from migration import bson2json
bson2json.json_tables()
elif sys.argv[1] == 'slug':
export_slug(sys.argv[2])
else:
print('usage: python migrate.py <bson|slug|topics|users|shouts|export_shouts [num]|slug [str]|all>')
print('usage: python migrate.py bson\n.. \ttopics <limit>\n.. \tusers <limit>\n.. \tshouts <limit>\n.. \tcomments\n.. \texport_shouts <limit>\n.. \tslug <slug>\n.. \tall>')

View File

@ -388,7 +388,7 @@ class HTML2Text(HTMLParser.HTMLParser):
parent_style = {}
if start:
if self.tag_stack:
parent_style = self.tag_stack[-1][2]
parent_style = self.tag_stack[-1][2]
tag_style = element_style(attrs, self.style_def, parent_style)
self.tag_stack.append((tag, attrs, tag_style))
else:
@ -418,7 +418,7 @@ class HTML2Text(HTMLParser.HTMLParser):
elif attrs['class'] == 'lead':
self.o('==') # NOTE: but CriticMarkup uses {== ==}
self.span_lead = True
elif self.:
else:
if self.span_hightlight:
self.o('`')
self.span_hightlight = False
@ -600,7 +600,7 @@ class HTML2Text(HTMLParser.HTMLParser):
# if self.google_doc:
# prevent white space immediately after 'begin emphasis' marks ('**' and '_')
lstripped_data = data.lstrip()
if self.drop_white_space and not (self.pre or self.code or self.span_hightlight or self.span_lead):
if self.drop_white_space and not (self.pre or self.code):
data = lstripped_data
if puredata: # and not self.pre:
data = re.sub('\s+', ' ', data)

View File

@ -1,12 +1,13 @@
from dateutil.parser import parse as date_parse
import json
import datetime
from os.path import abspath
from orm import Shout, Comment, CommentRating, User
from orm.base import local_session
from migration.html2text import html2text
users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
topics_dict = json.loads(open(abspath('migration/data/topics.dict.json')).read()) # old_id keyed
# users_dict = json.loads(open(abspath('migration/data/users.dict.json')).read())
# topics_dict = json.loads(open(abspath('migration/data/topics.dict.json')).read()) # old_id keyed
def migrate(entry):
'''
@ -39,39 +40,46 @@ def migrate(entry):
deletedAt: DateTime
deletedBy: Int
rating: Int
ratigns: [Rating]
ratigns: [CommentRating]
views: Int
old_id: String
old_thread: String
}
'''
with local_session() as session:
shout_id = session.query(Shout).filter(Shout.old_id == entry['_id']).first()
author_dict = users_dict[entry['createdBy']]
print(author_dict)
author_id = author_dict['id']
shout = session.query(Shout).filter(Shout.old_id == entry['_id']).first()
if not shout: print(entry)
assert shout, '=== NO SHOUT IN COMMENT ERROR ==='
author = session.query(User).filter(User.old_id == entry['_id']).first()
comment_dict = {
'old_id': entry['_id'],
'author': author_id,
'author': author.id if author else 0,
'createdAt': date_parse(entry['createdAt']),
'body': html2text(entry['body']),
'shout': shout_id
'shout': shout
}
if 'rating' in entry:
comment_dict['rating'] = entry['rating']
if 'deleted' in entry:
comment_dict['deleted'] = entry['deleted']
if entry.get('deleted'):
comment_dict['deletedAt'] = entry['updatedAt']
comment_dict['deletedBy'] = entry['updatedBy']
if 'thread' in entry:
comment_dict['old_thread'] = entry['thread']
print(entry.keys())
# print(entry.keys())
comment = Comment.create(**comment_dict)
for comment_rating_old in entry.get('ratings',[]):
rater_id = session.query(User).filter(User.old_id == comment_rating_old['createdBy']).first()
comment_rating_dict = {
'value': cr['value'],
'createdBy': rater_id,
'createdAt': date_parse(comment_rating_old['createdAt']) or ts
'value': comment_rating_old['value'],
'createdBy': rater_id or 0,
'createdAt': comment_rating_old.get('createdAt', datetime.datetime.now()),
'comment_id': comment.id
}
comment_rating = CommentRating.create(**comment_rating_dict)
comment['ratings'].append(comment_rating)
try:
comment_rating = CommentRating.create(**comment_rating_dict)
# TODO: comment rating append resolver
# comment['ratings'].append(comment_rating)
except Exception as e:
print(comment_rating)
pass # raise e
return comment

View File

@ -9,7 +9,7 @@ def migrate(entry):
children: [String] # and children
}
'''
return {
topic_dict = {
'slug': entry['slug'],
'createdBy': entry['createdBy'], # NOTE: uses an old user id
'createdAt': entry['createdAt'],
@ -17,4 +17,11 @@ def migrate(entry):
'parents': [],
'children': [],
'old_id': entry['_id']
}
}
with local_session() as session:
topic = session.query(Topic).filter(Topic.slug == topic_slug).first()
if not topic:
topic = Topic.create(**topic_dict)
topic_dict['id'] = topic.id
return topic_dict

View File

@ -184,6 +184,7 @@ def migrate(entry):
with local_session() as session:
user = session.query(User).filter(User.slug == slug).first()
r['authors'].append({
'id': user.id,
'slug': slug,
'name': name,
'userpic': userpic
@ -197,7 +198,7 @@ def migrate(entry):
if entry['published']:
ext = 'md'
open('migration/content/' +
r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
try:
shout_dict = r.copy()
shout_dict['authors'] = [user, ]
@ -211,21 +212,6 @@ def migrate(entry):
else:
shout_dict['publishedAt'] = ts
del shout_dict['published']
# shout comments
if entry.get('commentedAt', False):
try:
old_comments = comments_by_post.get(shout_dict['old_id'], [])
if len(old_comments) > 0:
shout_dict['comments'] = []
# migrate comments
for entry in old_comments:
comment = migrateComment(entry)
shout_dict['comments'].append(comment)
except KeyError:
print(shout_dict.keys())
raise 'error'
try:
topic_slugs = shout_dict['topics']
@ -248,18 +234,18 @@ def migrate(entry):
)
shout.ratings.append(shout_rating.id)
'''
# adding topics to created shout
for topic_slug in topic_slugs:
topic_dict = topics_dict.get(topic_slug)
if topic_dict:
topic = Topic.create(**topic_dict)
if not topic:
topic_dict = topics_dict.get(topic_slug)
if topic_dict:
topic = Topic.create(**topic_dict)
shout.topics = [ topic, ]
shout.save()
except Exception as e:
r['error'] = 'db error'
# pass
raise e
r['error'] = 'db error'
# pass
raise e
except Exception as e:
if not r['body']: r['body'] = 'body moved'
raise e

View File

@ -81,8 +81,6 @@ def migrate(entry, limit=668):
res['slug'] = res['email'].split('@')[0]
else:
old = res['old_id']
del res['old_id']
user = User.create(**res.copy())
res['id'] = user.id
res['old_id'] = old
return res

View File

@ -10,9 +10,9 @@ class CommentRating(Base):
__tablename__ = "comment_rating"
id = None
rater_id = Column(ForeignKey('user.id'), primary_key = True)
comment_id = Column(ForeignKey('comment.id'), primary_key = True)
ts: str = Column(DateTime, nullable=False, default = datetime.now, comment="Timestamp")
createdBy = Column(ForeignKey('user.id'), primary_key = True)
createdAt: str = Column(DateTime, nullable=False, default = datetime.now, comment="Timestamp")
value = Column(Integer)
class Comment(Base):
@ -28,7 +28,7 @@ class Comment(Base):
rating: int = Column(Integer, nullable=True, comment="Comment Rating")
ratings = relationship(CommentRating, foreign_keys=CommentRating.comment_id)
old_id: str = Column(String, nullable = True)
deleted: bool = Column(Boolean, nullable = True)
old_thread: str = Column(String, nullable = True)
# TODO: work in progress, udpate this code

View File

@ -152,7 +152,7 @@ type UserNotification {
type User {
id: Int!
username: String! # email
username: String! # to login, ex. email
createdAt: DateTime!
slug: String!
name: String # to display
@ -196,10 +196,18 @@ type Comment {
deletedAt: DateTime
deletedBy: Int
rating: Int
ratigns: [Rating]
ratigns: [CommentRating]
views: Int
old_id: String
deleted: Boolean
old_thread: String
}
type CommentRating {
id: Int!
comment_id: Int!
createdBy: Int!
createdAt: DateTime!
value: Int!
}
# is publication