]> git.quilime.com - visual-archive.git/commitdiff
Updating some scraping scripts.
authorGabriel Dunne <gdunne@quilime.com>
Sun, 7 Apr 2013 01:54:02 +0000 (18:54 -0700)
committerGabriel Dunne <gdunne@quilime.com>
Sun, 7 Apr 2013 01:54:02 +0000 (18:54 -0700)
Procfile [new file with mode: 0644]
public/css/style.css
public/js/script.js
scripts/lines-to-json.js
scripts/phantom-scrape-from-file.js [moved from scripts/scrape-from-file.js with 100% similarity]
scripts/phantom-scrape-id.js [moved from scripts/phantom-scrape.js with 100% similarity]
scripts/phantom-scrape-thumbnail.js [deleted file]
scripts/pjscrape-info_scrape_config.js [moved from scripts/my_config.js with 99% similarity]
scripts/pjscrape-thumb_scrape_config.js [moved from scripts/thumb_scrape_config.js with 100% similarity]
scripts/scrape-id.js [deleted file]
views/index.ejs

diff --git a/Procfile b/Procfile
new file mode 100644 (file)
index 0000000..e1d4131
--- /dev/null
+++ b/Procfile
@@ -0,0 +1 @@
+web: node app.js
index b9b86ea7d64137d15c24a0455ed1f3c4823188ab..6a4613f74a3b457d5f3b27d51247213bc03406ff 100644 (file)
@@ -41,6 +41,11 @@ ul.thumbs li img {
     width:160px;
     height:110px;
 }
+
+#loader {
+    margin-left:300px;
+}
+
 #scrim {
     background:rgba(0,0,0,0.8);
     width:100%;
index e2b4baaba81d8c814db3b70fe74644d8c3a258f5..653c2f5b1742e56c45db2a2ef2a9a2261938018d 100644 (file)
@@ -1,27 +1,10 @@
 var extended_json = {};
 
-var on_filter_change = function() {
-  var query = $('#filter').val().toLowerCase();
-  var d = jQuery.grep(extended_json.clips, function(clip, i) {
-    if (clip.id.toLowerCase().indexOf(query) >= 0 ||
-       (clip.description && clip.description.toLowerCase().indexOf(query) >= 0))
-      return true;
-    if (clip.subject)
-      for (var j = 0; j < clip.subject.length; j++)
-        if (clip.subject[j].toLowerCase().indexOf(query) >= 0)
-          return true;
-    return false;
-  });
-  $('.thumbs li').hide();
-  for (var i = 0; i < d.length; i++) {
-    $('.thumbs li#' + d[i].id).show();
-  }
-}
-
 $(document).ready(function() {
 
-  $('#facets a').each(function(key, elem) {
+  $('#thumbs').hide();
 
+  $('#facets a').each(function(key, elem) {
     $(elem).click(function() {
       $('#filter').val($(elem).text());
       on_filter_change();
@@ -32,13 +15,12 @@ $(document).ready(function() {
     $('#filter').val("");
     on_filter_change();
   });
-  $('#filter').hide();
   $('#filter').keyup(on_filter_change);
 
-
   $.getJSON('./data/prelinger_extended-search.json', function(data) {
     extended_json = data;
-    $('#filter').show();
+    $('#loader').hide();
+    $('#thumbs').show();
   });
 
   $('#scrim').click(function() {
@@ -60,13 +42,9 @@ $(document).ready(function() {
     var thumbs_url = $(elem).attr('data-thumbs-url');
 
     link.click(function() {
-
       scrim(1, function() {
-
-        $('#subcontent .container').empty();
-
         var offset = 225;
-
+        $('#subcontent .container').empty();
         $('#subcontent').css({
             width : window.innerWidth - offset + "px",
             height : window.innerHeight - offset + "px",
@@ -74,9 +52,9 @@ $(document).ready(function() {
             left : offset / 2
           })
           .fadeIn(100);
-
         $('#subcontent .container').text("loading...");
 
+        // scrape archive.org for thumbs to insert
         $.getJSON('/thumbs/?url=' + thumbs_url, function(data) {
           $('#subcontent .container').empty();
           var destElem = '#subcontent .container';
@@ -90,15 +68,11 @@ $(document).ready(function() {
               .appendTo(destElem + ' ul.thumbs')
               .click(function(){
                 var c = $('#subcontent .container');
-
                 //thumb.url
                 c.html('<iframe id="videoplayer" src="' + thumb.url + '"></iframe>');
-
-
               })
           });
         });
-
       });
       return false;
     });
@@ -108,14 +82,33 @@ $(document).ready(function() {
     link.mouseout(function() {
       img.attr('src', 'gifs/prelinger_static/' + thumb_name);
     });
-
   });
 });
 
+
+var on_filter_change = function() {
+  var query = $('#filter').val().toLowerCase();
+  var d = jQuery.grep(extended_json.clips, function(clip, i) {
+    if (clip.id.toLowerCase().indexOf(query) >= 0 ||
+       (clip.description && clip.description.toLowerCase().indexOf(query) >= 0))
+      return true;
+    if (clip.subject)
+      for (var j = 0; j < clip.subject.length; j++)
+        if (clip.subject[j].toLowerCase().indexOf(query) >= 0)
+          return true;
+    return false;
+  });
+  $('.thumbs li').hide();
+  for (var i = 0; i < d.length; i++) {
+    $('.thumbs li#' + d[i].id).show();
+  }
+};
+
+
 var scrim = function(visible, callback) {
   var s = $('#scrim');
   if (visible == 1)
     s.fadeIn(150, callback);
   else
     s.fadeOut(150, callback);
-}
+};
index 22ba61f7ef9968314251766205eeee1d9845977a..fe72376d9cc312f3a1ec201897b78c008b9be886 100644 (file)
@@ -3,8 +3,8 @@ var fs  = require("fs");
 
 console.log('{ identifiers : [');
 fs.readFileSync('./prelinger_identifiers.txt').toString().split('\n').forEach(
-function (line) {
-  console.log('"'+line + '",');
+       function (line) {
+       console.log('"'+line + '",');
   }
 );
 console.log(']}');
diff --git a/scripts/phantom-scrape-thumbnail.js b/scripts/phantom-scrape-thumbnail.js
deleted file mode 100644 (file)
index 9724f6a..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-var system = require('system'),
-    page = require('webpage').create();
-
-page.open(system.args[1], function () {
-    var th = page.evaluate(function () {
-        return document.getElementById('thumbnail').src;
-    });
-    console.log(th);
-    //console.log(JSON.stringify({ thumbnail : th }));
-    phantom.exit();
-});
similarity index 99%
rename from scripts/my_config.js
rename to scripts/pjscrape-info_scrape_config.js
index 635be05412379cafe035b3f28cc7146f515b800b..8814b4bad8c24f0112b8a8b6008cc433047f637f 100644 (file)
@@ -3803,7 +3803,7 @@ pjs.config({
   timeoutLimit: 10000,
   format: 'json',
   writer: 'file',
-  outFile: 'scrape_output.json',
+  outFile: 'info_scrape_output.json',
   pageSettings : { loadImages : false }
 });
 
diff --git a/scripts/scrape-id.js b/scripts/scrape-id.js
deleted file mode 100644 (file)
index 596c703..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-var system = require('system');
-var page = require('webpage').create();
-var url_details = "http://archive.org/details/";
-
-if (system.args.length === 1) {
-    console.log('Identifier Required');
-} else {
-    var id = system.args[1];
-    page.open(url_details + id, function () {
-        var th = page.evaluate(function () {
-            return document.getElementById('thumbnail').src;
-        });
-        console.log(th);
-        phantom.exit();
-    });
-}
index f1481d21a010e0e43a77f7aef814c50ce1b4f6c3..1e446fae634cccc8020d73806b7a0ab2319fd7b0 100644 (file)
@@ -3,11 +3,14 @@
 <head>
   <title><%= title %></title>
   <link rel='stylesheet' href='/css/style.css' />
-  <script src="js/jquery-1.9.1.min.js"></script>
-  <script src="js/script.js"></script>
+  <script src="js/jquery-1.9.1.min.js" type="text/javascript" ></script>
+  <script src="js/jquery.paginate.js" type="text/javascript" ></script>
+  <script src="js/script.js" type="text/javascript" ></script>
 </head>
 <body>
 
+<div id="loader">loading...</div>
+
 <div id="filter-elem">
   <input type="text" id="filter" /> <a href="#" id="clear_filter">&times;</a>
 </div>
 </ul>
 </div>
 
-<ul class="thumbs">
+<!--
+
+date: "2012-07-09T20:32:09Z"
+description: "Shot of containers of chlorine compounds for swimming pool sanitation Much of it is silent CUs of a puppet show VS Italian Swiss Colony vineyards Some is A&B, some outtakes US 101 south of Petaluma driving POV Petaluma City Limit Pop. 10,315 sign Brief shot boats at Fishermen's Wharf, San Francisco with Golden Gate Bridge in background More US 101 highway driving POVs, probably Sonoma County, California California US 101 highway marker"
+id: "0924_Greatest_Name_in_Wine_The_11_00_13_15"
+thumbnail: "https://ia700608.us.archive.org/17/items/0924_Greatest_Name_in_Wine_The_11_00_13_15/0924_Greatest_Name_in_Wine_The_11_00_13_15_3mb.gif"
+thumbnail_filename: "0924_Greatest_Name_in_Wine_The_11_00_13_15_3mb.gif"
+thumbnails_url: "http://archive.org/movies/thumbnails.php?identifier=0924_Greatest_Name_in_Wine_The_11_00_13_15"
+title: "Greatest Name in Wine, The"
+url: "http://archive.org/details/0924_Greatest_Name_in_Wine_The_11_00_13_15"
+
+ -->
+
+<ul id="thumbs" class="thumbs">
   <% for(var i = 0; i < clips.length; i++) { %>
     <li id="<%= clips[i].id %>" data-thumbs-url="<%= clips[i].thumbnails_url %>">
       <a href="#">
   <% } %>
 </ul>
 
+<div id="thumbs-pagination" style="display:none">
+    <a id="thumbs-previous" href="#" class="disabled">&laquo; Previous</a>
+    <a id="thumbs-next" href="#">Next &raquo;</a>
+</div>
+
 <div id="subcontent"><div class="container"></div></div>
 <div id="scrim" />
 </body>