Tagged: drupal

Move Bill files to session specific directory on hook_insert and hook_update

cga_bills_diff.php
<?php
/*
 * Move files in the embedded paragraph items.
 * param: $dir_path directory to move files to
 * param: $paragraph_items
 */
function _cga_bills_move_paragraph_files($dir_path, $paragraph_items) {
  if(!is_dir($dir_path)) {
    mkdir($dir_path, 0777, TRUE);
  }
  foreach ($paragraph_items as $pi) {
    if($f = field_get_items("paragraphs_item", $pi, "field_2_documents")) {
      $fields = $f;
    }
    elseif($f = field_get_items("paragraphs_item", $pi, "field_document")) {
      $fields = $f;
    }
    else {
      $fields = array();
    }
    
    foreach($fields as $f2) {
      $source = file_load($f2['fid']);
      $path = $dir_path . $f2['filename'];
      // return if the files have already been moved.
      if($f2['uri'] == $path) {
        watchdog("cga_bills", "file already moved to $path");
      } 
      else {
        file_move($source, $path, FILE_EXISTS_ERROR);
        watchdog("cga_bills", "file moved to $path");
        $source->uri = $path;
      }
    }
  }
  return true;
}
/*
 * Move files from the Bills embedded paragraph.
 * param: $node
 */
function _cga_bills_move_bill_files($node) {
  $wrapper = entity_metadata_wrapper('node', $node);
  // Get Session to build path
  $sessions = $wrapper->field_sessions->value();
  $sess = array_shift($sessions);
  $sess_wrapper = entity_metadata_wrapper('taxonomy_term', $sess->tid);
  $sess_key = $sess_wrapper->field_session_id->value();
  // process Bill Text Files paragraphs
  $dir_path = "public://documents/$sess_key/bills/";
  $paragraph_items = $wrapper->field_bill_text_files->value();
  _cga_bills_move_paragraph_files($dir_path, $paragraph_items);
  // process Bill Session Laws paragraphs
  $dir_path = "public://documents/$sess_key/bills/sl/";
  $paragraph_items = $wrapper->field_bill_session_laws->value();
  _cga_bills_move_paragraph_files($dir_path, $paragraph_items);
  // process Budget Files paragraphs
  $dir_path = "public://documents/$sess_key/bills/fn/";
  $paragraph_items = $wrapper->field_bill_budget_documents->value();
  _cga_bills_move_paragraph_files($dir_path, $paragraph_items);
  return true;
}
/*
 * Implements hook_node_insert().
 */
function cga_bills_node_update($node) {
  if($node->type == 'bill') {
    _cga_bills_move_bill_files($node);
  }
}
/*
 * Implements hook_node_insert().
 */
function cga_bills_node_insert($node) {
  if($node->type == 'bill') {
    _cga_bills_move_bill_files($node);
  }
}

Scrape Bill text files

scrape_bill_text.rb
require 'open-uri'
require 'nokogiri'
require 'mechanize'

def get_bill_nums
  doc = Nokogiri::HTML(open("http://leg.colorado.gov/bill-search"))
  bill_nums = doc.css("div.field-name-field-bill-number div.field-items").text.split("\s")
  bill_nums
end

bill_nums = get_bill_nums()
puts bill_nums
bill_nums.each do |bill_num|
  agent = Mechanize.new
  doc = Nokogiri::HTML(open("http://leg.colorado.gov/bills/#{bill_num}"))
  bill_file_url = doc.css("div.recent-bill-text a")[0].attributes["href"].text
  if !File.exists? "files/bills/#{bill_num}.pdf"
    agent.get(bill_file_url).save "files/bills/#{bill_num}.pdf"
  end
end