From a188af70b529d36e598e1c41be79e6056bf5927b Mon Sep 17 00:00:00 2001 From: Gabriel Dunne Date: Sat, 6 Apr 2013 18:54:02 -0700 Subject: [PATCH] Updating some scraping scripts. --- Procfile | 1 + public/css/style.css | 5 ++ public/js/script.js | 61 ++++++++----------- scripts/lines-to-json.js | 4 +- ...om-file.js => phantom-scrape-from-file.js} | 0 ...phantom-scrape.js => phantom-scrape-id.js} | 0 scripts/phantom-scrape-thumbnail.js | 11 ---- ...nfig.js => pjscrape-info_scrape_config.js} | 2 +- ...fig.js => pjscrape-thumb_scrape_config.js} | 0 scripts/scrape-id.js | 16 ----- views/index.ejs | 27 +++++++- 11 files changed, 60 insertions(+), 67 deletions(-) create mode 100644 Procfile rename scripts/{scrape-from-file.js => phantom-scrape-from-file.js} (100%) rename scripts/{phantom-scrape.js => phantom-scrape-id.js} (100%) delete mode 100644 scripts/phantom-scrape-thumbnail.js rename scripts/{my_config.js => pjscrape-info_scrape_config.js} (99%) rename scripts/{thumb_scrape_config.js => pjscrape-thumb_scrape_config.js} (100%) delete mode 100644 scripts/scrape-id.js diff --git a/Procfile b/Procfile new file mode 100644 index 0000000..e1d4131 --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: node app.js diff --git a/public/css/style.css b/public/css/style.css index b9b86ea..6a4613f 100644 --- a/public/css/style.css +++ b/public/css/style.css @@ -41,6 +41,11 @@ ul.thumbs li img { width:160px; height:110px; } + +#loader { + margin-left:300px; +} + #scrim { background:rgba(0,0,0,0.8); width:100%; diff --git a/public/js/script.js b/public/js/script.js index e2b4baa..653c2f5 100644 --- a/public/js/script.js +++ b/public/js/script.js @@ -1,27 +1,10 @@ var extended_json = {}; -var on_filter_change = function() { - var query = $('#filter').val().toLowerCase(); - var d = jQuery.grep(extended_json.clips, function(clip, i) { - if (clip.id.toLowerCase().indexOf(query) >= 0 || - (clip.description && clip.description.toLowerCase().indexOf(query) >= 0)) - return true; - if (clip.subject) - for (var j = 0; j < clip.subject.length; j++) - if (clip.subject[j].toLowerCase().indexOf(query) >= 0) - return true; - return false; - }); - $('.thumbs li').hide(); - for (var i = 0; i < d.length; i++) { - $('.thumbs li#' + d[i].id).show(); - } -} - $(document).ready(function() { - $('#facets a').each(function(key, elem) { + $('#thumbs').hide(); + $('#facets a').each(function(key, elem) { $(elem).click(function() { $('#filter').val($(elem).text()); on_filter_change(); @@ -32,13 +15,12 @@ $(document).ready(function() { $('#filter').val(""); on_filter_change(); }); - $('#filter').hide(); $('#filter').keyup(on_filter_change); - $.getJSON('./data/prelinger_extended-search.json', function(data) { extended_json = data; - $('#filter').show(); + $('#loader').hide(); + $('#thumbs').show(); }); $('#scrim').click(function() { @@ -60,13 +42,9 @@ $(document).ready(function() { var thumbs_url = $(elem).attr('data-thumbs-url'); link.click(function() { - scrim(1, function() { - - $('#subcontent .container').empty(); - var offset = 225; - + $('#subcontent .container').empty(); $('#subcontent').css({ width : window.innerWidth - offset + "px", height : window.innerHeight - offset + "px", @@ -74,9 +52,9 @@ $(document).ready(function() { left : offset / 2 }) .fadeIn(100); - $('#subcontent .container').text("loading..."); + // scrape archive.org for thumbs to insert $.getJSON('/thumbs/?url=' + thumbs_url, function(data) { $('#subcontent .container').empty(); var destElem = '#subcontent .container'; @@ -90,15 +68,11 @@ $(document).ready(function() { .appendTo(destElem + ' ul.thumbs') .click(function(){ var c = $('#subcontent .container'); - //thumb.url c.html(''); - - }) }); }); - }); return false; }); @@ -108,14 +82,33 @@ $(document).ready(function() { link.mouseout(function() { img.attr('src', 'gifs/prelinger_static/' + thumb_name); }); - }); }); + +var on_filter_change = function() { + var query = $('#filter').val().toLowerCase(); + var d = jQuery.grep(extended_json.clips, function(clip, i) { + if (clip.id.toLowerCase().indexOf(query) >= 0 || + (clip.description && clip.description.toLowerCase().indexOf(query) >= 0)) + return true; + if (clip.subject) + for (var j = 0; j < clip.subject.length; j++) + if (clip.subject[j].toLowerCase().indexOf(query) >= 0) + return true; + return false; + }); + $('.thumbs li').hide(); + for (var i = 0; i < d.length; i++) { + $('.thumbs li#' + d[i].id).show(); + } +}; + + var scrim = function(visible, callback) { var s = $('#scrim'); if (visible == 1) s.fadeIn(150, callback); else s.fadeOut(150, callback); -} +}; diff --git a/scripts/lines-to-json.js b/scripts/lines-to-json.js index 22ba61f..fe72376 100644 --- a/scripts/lines-to-json.js +++ b/scripts/lines-to-json.js @@ -3,8 +3,8 @@ var fs = require("fs"); console.log('{ identifiers : ['); fs.readFileSync('./prelinger_identifiers.txt').toString().split('\n').forEach( -function (line) { - console.log('"'+line + '",'); + function (line) { + console.log('"'+line + '",'); } ); console.log(']}'); diff --git a/scripts/scrape-from-file.js b/scripts/phantom-scrape-from-file.js similarity index 100% rename from scripts/scrape-from-file.js rename to scripts/phantom-scrape-from-file.js diff --git a/scripts/phantom-scrape.js b/scripts/phantom-scrape-id.js similarity index 100% rename from scripts/phantom-scrape.js rename to scripts/phantom-scrape-id.js diff --git a/scripts/phantom-scrape-thumbnail.js b/scripts/phantom-scrape-thumbnail.js deleted file mode 100644 index 9724f6a..0000000 --- a/scripts/phantom-scrape-thumbnail.js +++ /dev/null @@ -1,11 +0,0 @@ -var system = require('system'), - page = require('webpage').create(); - -page.open(system.args[1], function () { - var th = page.evaluate(function () { - return document.getElementById('thumbnail').src; - }); - console.log(th); - //console.log(JSON.stringify({ thumbnail : th })); - phantom.exit(); -}); diff --git a/scripts/my_config.js b/scripts/pjscrape-info_scrape_config.js similarity index 99% rename from scripts/my_config.js rename to scripts/pjscrape-info_scrape_config.js index 635be05..8814b4b 100644 --- a/scripts/my_config.js +++ b/scripts/pjscrape-info_scrape_config.js @@ -3803,7 +3803,7 @@ pjs.config({ timeoutLimit: 10000, format: 'json', writer: 'file', - outFile: 'scrape_output.json', + outFile: 'info_scrape_output.json', pageSettings : { loadImages : false } }); diff --git a/scripts/thumb_scrape_config.js b/scripts/pjscrape-thumb_scrape_config.js similarity index 100% rename from scripts/thumb_scrape_config.js rename to scripts/pjscrape-thumb_scrape_config.js diff --git a/scripts/scrape-id.js b/scripts/scrape-id.js deleted file mode 100644 index 596c703..0000000 --- a/scripts/scrape-id.js +++ /dev/null @@ -1,16 +0,0 @@ -var system = require('system'); -var page = require('webpage').create(); -var url_details = "http://archive.org/details/"; - -if (system.args.length === 1) { - console.log('Identifier Required'); -} else { - var id = system.args[1]; - page.open(url_details + id, function () { - var th = page.evaluate(function () { - return document.getElementById('thumbnail').src; - }); - console.log(th); - phantom.exit(); - }); -} diff --git a/views/index.ejs b/views/index.ejs index f1481d2..1e446fa 100644 --- a/views/index.ejs +++ b/views/index.ejs @@ -3,11 +3,14 @@ <%= title %> - - + + + +
loading...
+
×
@@ -60,7 +63,20 @@ -