evernote validate

https://github.com/leanote/desktop-app/issues/73
This commit is contained in:
life
2015-10-27 10:27:53 +08:00
parent 11bb0d6604
commit da48d018eb
3 changed files with 64 additions and 23 deletions

View File

@@ -1,5 +1 @@
<<<<<<< HEAD {"version":"0.9","updatedTime":"2015-10-26T07:11:51.505Z"}
{"version":"0.8","updatedTime":"2015-10-24T07:11:51.505Z"}
=======
{"version":"0.9","updatedTime":"2015-10-16T07:11:51.505Z"}
>>>>>>> feature-export-from-evernote

View File

@@ -196,30 +196,64 @@ function stripUnsafeAttrs (str) {
module.exports.stripUnsafeAttrs = stripUnsafeAttrs; module.exports.stripUnsafeAttrs = stripUnsafeAttrs;
function stripUnsafeTags (str) { function stripUnsafeTags (str) {
var el = /<(?:wbr|form|input|font|blink|script|style|comment|plaintext|xmp|link|listing|meta|body|frame|frameset)\b/;
// var el = /<(?:wbr|form|input|font|blink|script|style|comment|plaintext|xmp|link|listing|meta|body|frame|frameset)\b/;
var ct = 0, max = 2; var ct = 0, max = 2;
// Prohibited elements
var otherTags = ['wbr','style', 'comment', 'plaintext', 'xmp', 'listing',
// 以下是evernote禁止的
'applet','base','basefont','bgsound','blink','body','button','dir','embed','fieldset','frameset','head',
'html','iframe','ilayer','input','isindex','label','layer','legend','link','marquee','menu','meta','noframes',
'noscript','object','optgroup','option','param','plaintext','script','select','style','textarea','xml'];
var patterReplace1 = '';
var patterReplace2 = '';
var pattern = '<(?:';
for (var i = 0; i < otherTags.length; ++i) {
pattern += otherTags[i] + '|';
patterReplace2 += otherTags[i] + '|';
}
pattern += 'body)\\b';
patterReplace2 += 'body'
var reg = new RegExp(pattern);
// 单个自闭合<input />
var replageReg = new RegExp('<\\/?(?:' + patterReplace2 + ')[^>]*?>', 'gi');
// We'll repeatedly try to strip any maliciously nested elements up to [max] times // We'll repeatedly try to strip any maliciously nested elements up to [max] times
while (el.test(str) && ct++ < max) { while (reg.test(str) && ct++ < max) {
str = str.replace(/<form[^>]*?>[\s\S]*?<\/form>/gi, '') for (var i = 0; i < otherTags.length; ++i) {
.replace(/<input[^>]*?>[\s\S]*?<\/input>/gi, '') var tag = otherTags[i];
.replace(/<\/?(?:form|input|font|blink)[^>]*?>/gi, '') // 双闭合<a></a>
str = str.replace(new RegExp('<' + tag + '[^>]*?>[\\s\\S]*?<\\/' + tag + '>', 'gi'), '')
}
// 单个自闭合
str = str.replace(replageReg, '');
// str = str.replace(/<form[^>]*?>[\s\S]*?<\/form>/gi, '')
// .replace(/<applet[^>]*?>[\s\S]*?<\/applet>/gi, '')
// .replace(/<input[^>]*?>[\s\S]*?<\/input>/gi, '')
// .replace(/<\/?(?:form|input|font|blink)[^>]*?>/gi, '')
// These are XSS/security risks // These are XSS/security risks
.replace(/<script[^>]*?>[\s\S]*?<\/script>/gi, '') // .replace(/<script[^>]*?>[\s\S]*?<\/script>/gi, '')
.replace(/<(\/)*wbr[^>]*?>/gi, '') // .replace(/<(\/)*wbr[^>]*?>/gi, '')
.replace(/<style[^>]*?>[\s\S]*?<\/style>/gi, '') // shouldn't work anyway... // .replace(/<style[^>]*?>[\s\S]*?<\/style>/gi, '') // shouldn't work anyway...
.replace(/<comment[^>]*?>[\s\S]*?<\/comment>/gi, '') // .replace(/<comment[^>]*?>[\s\S]*?<\/comment>/gi, '')
.replace(/<plaintext[^>]*?>[\s\S]*?<\/plaintext>/gi, '') // .replace(/<plaintext[^>]*?>[\s\S]*?<\/plaintext>/gi, '')
.replace(/<xmp[^>]*?>[\s\S]*?<\/xmp>/gi, '') // .replace(/<xmp[^>]*?>[\s\S]*?<\/xmp>/gi, '')
.replace(/<\/?(?:link|listing|meta|body|frame|frameset)[^>]*?>/gi, '') // .replace(/<\/?(?:link|listing|meta|body|frame|frameset)[^>]*?>/gi, '')
// Delete iframes, except those inserted by Google in lieu of video embeds // Delete iframes, except those inserted by Google in lieu of video embeds
.replace(/<iframe(?![^>]*?src=("|')\S+?reader.googleusercontent.com\/reader\/embediframe.+?\1)[^>]*?>[\s\S]*?<\/iframe>/gi, '') // .replace(/<iframe(?![^>]*?src=("|')\S+?reader.googleusercontent.com\/reader\/embediframe.+?\1)[^>]*?>[\s\S]*?<\/iframe>/gi, '')
; // ;
}
if (el.test(str)) {
// We couldn't safely strip the HTML, so we return an empty string
return '';
} }
// if (el.test(str)) {
// // We couldn't safely strip the HTML, so we return an empty string
// return '';
// }
return str; return str;
} }
module.exports.stripUnsafeTags = stripUnsafeTags; module.exports.stripUnsafeTags = stripUnsafeTags;

View File

@@ -7,6 +7,16 @@
* 1. 导出的文件有可能不能导入到evernote, 即使可以导入, 也有可能不能同步 * 1. 导出的文件有可能不能导入到evernote, 即使可以导入, 也有可能不能同步
* 原因: enml.dtd * 原因: enml.dtd
* 2. 导出markdown问题, 加一个<pre>markdown content</pre>. 导出的markdown没有图片 * 2. 导出markdown问题, 加一个<pre>markdown content</pre>. 导出的markdown没有图片
*
* https://dev.evernote.com/doc/articles/enml.php
Before submitting HTML content over the EDAM API the client application is expected to follow the following steps:
1. Convert the document into valid XML
2. Discard all tags that are not accepted by the ENML DTD
3. Convert tags to the proper ENML equivalent (e.g. BODY becomes EN-NOTE)
4. Validate against the ENML DTD
5. Validate href and src values to be valid URLs and protocols
*/ */
define(function() { define(function() {
var async = require('async'); var async = require('async');
@@ -149,6 +159,7 @@ define(function() {
me.fixResources(note.Content, function (content, resources) { me.fixResources(note.Content, function (content, resources) {
content = $('<div>' + content + '</div>').html(); content = $('<div>' + content + '</div>').html();
content = content.replace(/<br.*?>/g, '<br />'); content = content.replace(/<br.*?>/g, '<br />');
content = content.replace(/<hr.*?>/g, '<hr />');
info.resources = resources; info.resources = resources;
enml.ENMLOfHTML(content, function(err, ENML) { enml.ENMLOfHTML(content, function(err, ENML) {